diff --git a/.gitignore b/.gitignore
index 68bc17f..3a2797f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,5 @@
+imdb*
+
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..311f598
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,54 @@
+asttokens==2.4.1
+Bottleneck @ file:///C:/b/abs_f7un855idq/croot/bottleneck_1709069969633/work
+Brotli @ file:///C:/b/abs_3d36mno480/croot/brotli-split_1714483178642/work
+certifi @ file:///C:/b/abs_35d7n66oz9/croot/certifi_1707229248467/work/certifi
+charset-normalizer @ file:///tmp/build/80754af9/charset-normalizer_1630003229654/work
+colorama==0.4.6
+comm==0.2.2
+debugpy==1.8.1
+decorator==5.1.1
+executing==2.0.1
+idna @ file:///C:/b/abs_aad84bnnw5/croot/idna_1714398896795/work
+ipykernel==6.29.4
+ipython==8.24.0
+jedi==0.19.1
+jupyter_client==8.6.1
+jupyter_core==5.7.2
+matplotlib-inline==0.1.7
+mkl-fft @ file:///C:/Users/dev-admin/perseverance-python-buildout/croot/mkl_fft_1699473528480/work
+mkl-random @ file:///C:/Users/dev-admin/perseverance-python-buildout/croot/mkl_random_1699473588250/work
+mkl-service==2.4.0
+nest-asyncio==1.6.0
+numexpr @ file:///C:/Users/dev-admin/perseverance-python-buildout/croot/numexpr_1699503421264/work
+numpy @ file:///C:/b/abs_c1ywpu18ar/croot/numpy_and_numpy_base_1708638681471/work/dist/numpy-1.26.4-cp312-cp312-win_amd64.whl#sha256=becc06674317799ad0165a939a7613809d0bee9bd328a1e4308c57c39cacf08c
+packaging==24.0
+pandas @ file:///C:/b/abs_3awk0iw2ab/croot/pandas_1709590545218/work/dist/pandas-2.2.1-cp312-cp312-win_amd64.whl#sha256=8f9837b9f672189b7e2df7dcf64b91243a78b0fad6e1125220b33a39b5c9e598
+parso==0.8.4
+platformdirs==4.2.2
+prompt-toolkit==3.0.43
+psutil==5.9.8
+pure-eval==0.2.2
+py4j @ file:///C:/Users/dev-admin/perseverance-python-buildout/croot/py4j_1699562107675/work
+pyarrow @ file:///C:/b/abs_93i_y2dub4/croot/pyarrow_1707330894046/work/python
+Pygments==2.18.0
+PySocks @ file:///C:/Users/dev-admin/perseverance-python-buildout/croot/pysocks_1699473336188/work
+pyspark @ file:///C:/Users/dev-admin/perseverance-python-buildout/croot/pyspark_1701815041952/work
+python-dateutil==2.9.0.post0
+pytz @ file:///C:/b/abs_6ap4tsz1ox/croot/pytz_1713974360290/work
+pywin32==306
+pyzmq==26.0.3
+requests @ file:///C:/b/abs_474vaa3x9e/croot/requests_1707355619957/work
+rm==2020.12.3
+setuptools==69.5.1
+six==1.16.0
+stack-data==0.6.3
+tornado==6.4
+traitlets==5.14.3
+tzdata @ file:///croot/python-tzdata_1690578112552/work
+unzip==1.0.0
+urllib3 @ file:///C:/b/abs_8e4z8_gh1l/croot/urllib3_1715636317140/work
+values==2020.12.3
+wcwidth==0.2.13
+wget==3.2
+wheel==0.43.0
+win-inet-pton @ file:///C:/Users/dev-admin/perseverance-python-buildout/croot/win_inet_pton_1699472992992/work
diff --git a/trabalho_big_data.ipynb b/trabalho_big_data.ipynb
index f2a005c..5e50b8b 100644
--- a/trabalho_big_data.ipynb
+++ b/trabalho_big_data.ipynb
@@ -20,29 +20,43 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "!wget https://raw.githubusercontent.com/N-CPUninter/Big_Data/main/data/imdb-reviews-pt-br.zip -O imdb-reviews-pt-br.zip\n",
-    "!unzip imdb-reviews-pt-br.zip\n",
-    "!rm imdb-reviews-pt-br.zip"
-   ]
-  },
-  {
-   "cell_type": "markdown",
+   "execution_count": 1,
    "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Downloaded imdb-reviews-pt-br.zip successfully!\n",
+      "Extracted files from imdb-reviews-pt-br.zip\n",
+      "Removed imdb-reviews-pt-br.zip\n"
+     ]
+    }
+   ],
    "source": [
-    "## Instalação manual das dependências para uso do pyspark no Google Colab"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "!pip install pyspark"
+    "import os\n",
+    "import requests\n",
+    "import zipfile\n",
+    "\n",
+    "url = \"https://raw.githubusercontent.com/N-CPUninter/Big_Data/main/data/imdb-reviews-pt-br.zip\"\n",
+    "filename = \"imdb-reviews-pt-br.zip\"\n",
+    "\n",
+    "response = requests.get(url, stream=True)\n",
+    "\n",
+    "if response.status_code == 200:\n",
+    "    with open(filename, \"wb\") as f:\n",
+    "        for chunk in response.iter_content(1024):\n",
+    "            f.write(chunk)\n",
+    "    print(f\"Downloaded {filename} successfully!\")\n",
+    "else:\n",
+    "    print(f\"Error downloading file: {response.status_code}\")\n",
+    "\n",
+    "with zipfile.ZipFile(filename, 'r') as zip_ref:\n",
+    "    zip_ref.extractall()\n",
+    "    print(\"Extracted files from\", filename)\n",
+    "\n",
+    "os.remove(filename)\n",
+    "print(f\"Removed {filename}\")"
    ]
   },
   {
@@ -54,16 +68,28 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Iniciando spark session para Raquel - RU 3803786\n",
+      "Sessão iniciado com sucesso! 🚀\n"
+     ]
+    }
+   ],
    "source": [
     "from pyspark.sql import SparkSession\n",
     "\n",
-    "appName = \"PySpark Trabalho de Big Data\"\n",
+    "MEU_RU = \"3803786\"\n",
+    "appName = f\"PySpark Trabalho de Big Data - {MEU_RU}\"\n",
     "master = \"local\"\n",
     "\n",
-    "spark = SparkSession.builder.appName(appName).master(master).getOrCreate()"
+    "print(f\"Iniciando spark session para Raquel - RU {MEU_RU}\")\n",
+    "spark: SparkSession = SparkSession.builder.appName(appName).master(master).getOrCreate()\n",
+    "print(\"Sessão iniciado com sucesso! 🚀\")"
    ]
   },
   {
@@ -75,14 +101,25 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
-    "imdb_df = spark.read.csv('imdb-reviews-pt-br.csv', \n",
-    "                         header=True, \n",
-    "                         quote=\"\\\"\", \n",
-    "                         escape=\"\\\"\", \n",
+    "from pyspark.sql import DataFrame\n",
+    "from pyspark.sql.types import StructType, StructField, StringType\n",
+    "\n",
+    "schema = StructType([\n",
+    "  StructField(\"id\", StringType(), True),\n",
+    "  StructField(\"text_en\", StringType(), True),\n",
+    "  StructField(\"text_pt\", StringType(), True),\n",
+    "  StructField(\"sentiment\", StringType(), True),\n",
+    "])\n",
+    "\n",
+    "\n",
+    "imdb_df: DataFrame = spark.read.csv('imdb-reviews-pt-br.csv',\n",
+    "                         header=True,\n",
+    "                         quote=\"\\\"\",\n",
+    "                         escape=\"\\\"\",\n",
     "                         encoding=\"UTF-8\")"
    ]
   },
@@ -103,14 +140,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
-    "def map1(x):\n",
-    "  # Coloque aqui o seu código para retornar a tupla necessária.\n",
-    "  # Apague a linha abaixo para iniciar seu código.\n",
-    "  pass"
+    "from pyspark.sql import DataFrame\n",
+    "\n",
+    "def filter_negative_reviews(data: DataFrame) -> DataFrame:\n",
+    "    MEU_RU = \"3803786\"\n",
+    "    print(f\"Meu RU é {MEU_RU}\")\n",
+    "\n",
+    "    return data.filter(data[\"sentiment\"] == \"neg\")"
    ]
   },
   {
@@ -124,14 +164,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [],
    "source": [
-    "def reduceByKey1(x,y):\n",
-    "  # Coloque aqui o seu código para retornar o resultado necessário.\n",
-    "  # Apague a linha abaixo para iniciar seu código.\n",
-    "  pass"
+    "from pyspark.sql import DataFrame\n",
+    "from pyspark.sql.functions import col, sum\n",
+    "\n",
+    "def sum_negative_ids(reviews: DataFrame) -> DataFrame:\n",
+    "  MEU_RU = \"3803786\"\n",
+    "  print(f\"Lembrando que meu RU é {MEU_RU}\")\n",
+    "\n",
+    "  return reviews.withColumn(\"id\", col(\"id\").cast(\"int\")).select(sum(\"id\"))"
    ]
   },
   {
@@ -143,12 +187,24 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Meu RU é 3803786\n",
+      "Lembrando que meu RU é 3803786\n",
+      "Soma de IDs das reviews negativas: 459568555\n"
+     ]
+    }
+   ],
    "source": [
-    "# Coloque aqui a sua linha de código para aplicar o map/reduce no seu \n",
-    "# dataframe spark e realize o collect() ao final para visualizar os dados.\n"
+    "negative_reviews = filter_negative_reviews(imdb_df)\n",
+    "sum_of_negative_ids = sum_negative_ids(negative_reviews).collect()[0][0]\n",
+    "\n",
+    "print(f\"Soma de IDs das reviews negativas: {sum_of_negative_ids}\")"
    ]
   },
   {
@@ -168,14 +224,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [],
    "source": [
-    "def map2(x):\n",
-    "  # Coloque aqui o seu código para retornar a tupla necessária.\n",
-    "  # Apague a linha abaixo para iniciar seu código.\n",
-    "  pass"
+    "from pyspark.sql import DataFrame\n",
+    "from pyspark.sql.functions import col, split, size\n",
+    "\n",
+    "def map_sentiment_to_word_count(data: DataFrame) -> DataFrame:\n",
+    "  MEU_RU = \"3803786\"\n",
+    "  print(f\"Oi! Sou Raquel e meu RU é {MEU_RU}\")\n",
+    "\n",
+    "  return data.select(col(\"sentiment\"), size(split(col(\"text_en\"), \"\\\\s+\")).alias(\"text_en_word_count\"), size(split(col(\"text_pt\"), \"\\\\s+\")).alias(\"text_pt_word_count\"))"
    ]
   },
   {
@@ -189,14 +249,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [],
    "source": [
-    "def reduceByKey2(x,y):\n",
-    "  # Coloque aqui o seu código para retornar o resultado necessário.\n",
-    "  # Apague a linha abaixo para iniciar seu código.\n",
-    "  pass"
+    "from pyspark.sql import DataFrame\n",
+    "\n",
+    "def reduce_word_count_by_sentiment(sentiment_word_counts: DataFrame) -> DataFrame:\n",
+    "  MEU_RU = \"3803786\"\n",
+    "  print(f\"Já falei que meu RU é {MEU_RU}?\")\n",
+    "\n",
+    "  return sentiment_word_counts.groupBy(\"sentiment\").agg(sum(\"text_en_word_count\").alias(\"total_text_en_words\"), sum(\"text_pt_word_count\").alias(\"total_text_pt_words\"))\n"
    ]
   },
   {
@@ -212,17 +275,49 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 9,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Meu RU é 3803786\n",
+      "Oi! Sou Raquel e meu RU é 3803786\n",
+      "Já falei que meu RU é 3803786?\n",
+      "Diferença entre a contagem de palavras: 54976 (Texto em PT - Texto em EN)\n"
+     ]
+    }
+   ],
    "source": [
-    "# Coloque aqui suas linhas de código final\n"
+    "negative_data = filter_negative_reviews(imdb_df)\n",
+    "\n",
+    "sentiment_word_counts = map_sentiment_to_word_count(negative_data)\n",
+    "total_word_counts = reduce_word_count_by_sentiment(sentiment_word_counts)\n",
+    "result = total_word_counts.collect()[0]\n",
+    "word_count_difference = result[2] - result[1]\n",
+    "\n",
+    "print(f\"Diferença entre a contagem de palavras: {word_count_difference} (Texto em PT - Texto em EN)\")\n"
    ]
   }
  ],
  "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
   "language_info": {
-   "name": "python"
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.3"
   },
   "orig_nbformat": 4
  },