Bump version & CHANGELOG [run doc]

JohnSnowLabs · Sep 25, 2023 · ca4e3e9 · ca4e3e9
1 parent da1070f
commit ca4e3e9
Show file tree

Hide file tree

Showing 19 changed files with 156 additions and 143 deletions.
diff --git a/CHANGELOG b/CHANGELOG
@@ -1,3 +1,14 @@
+========
+5.1.2
+========
+----------------
+New Features & Enhancements
+----------------
+* **NEW:** Introducing VisionEncoderDecoder annotator to generate captions from images
+* Add missing enteries in the docs and update them with the new features
+* Improve beam search results in BART Transformer
+
+
 ========
 5.1.1
 ========

diff --git a/README.md b/README.md
diff --git a/build.sbt b/build.sbt
@@ -6,7 +6,7 @@ name := getPackageName(is_silicon, is_gpu, is_aarch64)
 
 organization := "com.johnsnowlabs.nlp"
 
-version := "5.1.1"
+version := "5.1.2"
 
 (ThisBuild / scalaVersion) := scalaVer
 

diff --git a/docs/_layouts/landing.html b/docs/_layouts/landing.html
@@ -201,7 +201,7 @@ <h3 class="grey h3_title">{{ _section.title }}</h3>
                   <div class="highlight-box">
     {% highlight bash %}
     # Using PyPI
-    $ pip install spark-nlp==5.1.1
+    $ pip install spark-nlp==5.1.2
 
     # Using Anaconda/Conda
     $ conda install -c johnsnowlabs spark-nlp
@@ -339,8 +339,8 @@ <h4 class="blue h4_title">NLP Features</h4>
                     <li>Easy <strong>ONNX</strong> and <strong>TensorFlow</strong> integrations</li>
                     <li><strong>GPU</strong> Support</li>
                     <li>Full integration with <strong>Spark ML</strong> functions</li>
-                    <li><strong>12000+</strong> pre-trained <strong>models </strong> in <strong>200+ languages! </strong>
-                    <li><strong>5000+</strong> pre-trained <strong>pipelines </strong> in <strong>200+ languages! </strong>
+                    <li><strong>15000+</strong> pre-trained <strong>models </strong> in <strong>200+ languages! </strong>
+                    <li><strong>5800+</strong> pre-trained <strong>pipelines </strong> in <strong>200+ languages! </strong>
                   </ul>
                 </div>
 {% highlight python %}

diff --git a/docs/en/concepts.md b/docs/en/concepts.md
@@ -66,7 +66,7 @@ $ java -version
 $ conda create -n sparknlp python=3.7 -y
 $ conda activate sparknlp
 # spark-nlp by default is based on pyspark 3.x
-$ pip install spark-nlp==5.1.1 pyspark==3.3.1 jupyter
+$ pip install spark-nlp==5.1.2 pyspark==3.3.1 jupyter
 $ jupyter notebook
 ```
 

diff --git a/docs/en/examples.md b/docs/en/examples.md
@@ -18,7 +18,7 @@ $ java -version
 # should be Java 8 (Oracle or OpenJDK)
 $ conda create -n sparknlp python=3.7 -y
 $ conda activate sparknlp
-$ pip install spark-nlp==5.1.1 pyspark==3.3.1
+$ pip install spark-nlp==5.1.2 pyspark==3.3.1
 ```
 
 </div><div class="h3-box" markdown="1">
@@ -40,7 +40,7 @@ This script comes with the two options to define `pyspark` and `spark-nlp` versi
 # -p is for pyspark
 # -s is for spark-nlp
 # by default they are set to the latest
-!bash colab.sh -p 3.2.3 -s 5.1.1
+!bash colab.sh -p 3.2.3 -s 5.1.2
 ```
 
 [Spark NLP quick start on Google Colab](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/quick_start_google_colab.ipynb) is a live demo on Google Colab that performs named entity recognitions and sentiment analysis by using Spark NLP pretrained pipelines.

diff --git a/docs/en/hardware_acceleration.md b/docs/en/hardware_acceleration.md
@@ -49,7 +49,7 @@ Since the new Transformer models such as BERT for Word and Sentence embeddings a
 | DeBERTa Large     |        +477%(5.8x)        |
 | Longformer Base   |         +52%(1.5x)        |
 
-Spark NLP 5.1.1 is built with TensorFlow 2.7.1 and the following NVIDIA® software are only required for GPU support:
+Spark NLP 5.1.2 is built with TensorFlow 2.7.1 and the following NVIDIA® software are only required for GPU support:
 
 - NVIDIA® GPU drivers version 450.80.02 or higher
 - CUDA® Toolkit 11.2

diff --git a/docs/en/install.md b/docs/en/install.md
@@ -17,22 +17,22 @@ sidebar:
 
 ```bash
 # Install Spark NLP from PyPI
-pip install spark-nlp==5.1.1
+pip install spark-nlp==5.1.2
 
 # Install Spark NLP from Anacodna/Conda
 conda install -c johnsnowlabs spark-nlp
 
 # Load Spark NLP with Spark Shell
-spark-shell --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1
+spark-shell --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.2
 
 # Load Spark NLP with PySpark
-pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1
+pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.2
 
 # Load Spark NLP with Spark Submit
-spark-submit --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1
+spark-submit --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.2
 
 # Load Spark NLP as external JAR after compiling and building Spark NLP by `sbt assembly`
-spark-shell --jars spark-nlp-assembly-5.1.1.jar
+spark-shell --jars spark-nlp-assembly-5.1.2.jar
 ```
 
 </div><div class="h3-box" markdown="1">
@@ -55,7 +55,7 @@ $ java -version
 # should be Java 8 (Oracle or OpenJDK)
 $ conda create -n sparknlp python=3.8 -y
 $ conda activate sparknlp
-$ pip install spark-nlp==5.1.1 pyspark==3.3.1
+$ pip install spark-nlp==5.1.2 pyspark==3.3.1
 ```
 
 Of course you will need to have jupyter installed in your system:
@@ -83,7 +83,7 @@ spark = SparkSession.builder \
     .config("spark.driver.memory","16G")\
     .config("spark.driver.maxResultSize", "0") \
     .config("spark.kryoserializer.buffer.max", "2000M")\
-    .config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1")\
+    .config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.2")\
     .getOrCreate()
 ```
 
@@ -100,7 +100,7 @@ spark = SparkSession.builder \
 <dependency>
     <groupId>com.johnsnowlabs.nlp</groupId>
     <artifactId>spark-nlp_2.12</artifactId>
-    <version>5.1.1</version>
+    <version>5.1.2</version>
 </dependency>
 ```
 
@@ -111,7 +111,7 @@ spark = SparkSession.builder \
 <dependency>
     <groupId>com.johnsnowlabs.nlp</groupId>
     <artifactId>spark-nlp-gpu_2.12</artifactId>
-    <version>5.1.1</version>
+    <version>5.1.2</version>
 </dependency>
 ```
 
@@ -122,7 +122,7 @@ spark = SparkSession.builder \
 <dependency>
     <groupId>com.johnsnowlabs.nlp</groupId>
     <artifactId>spark-nlp-silicon_2.12</artifactId>
-    <version>5.1.1</version>
+    <version>5.1.2</version>
 </dependency>
 ```
 
@@ -133,7 +133,7 @@ spark = SparkSession.builder \
 <dependency>
     <groupId>com.johnsnowlabs.nlp</groupId>
     <artifactId>spark-nlp-aarch64_2.12</artifactId>
-    <version>5.1.1</version>
+    <version>5.1.2</version>
 </dependency>
 ```
 
@@ -145,28 +145,28 @@ spark = SparkSession.builder \
 
 ```scala
 // https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp
-libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp" % "5.1.1"
+libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp" % "5.1.2"
 ```
 
 **spark-nlp-gpu:**
 
 ```scala
 // https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp-gpu
-libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-gpu" % "5.1.1"
+libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-gpu" % "5.1.2"
 ```
 
 **spark-nlp-silicon:**
 
 ```scala
 // https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp-silicon
-libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-silicon" % "5.1.1"
+libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-silicon" % "5.1.2"
 ```
 
 **spark-nlp-aarch64:**
 
 ```scala
 // https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp-aarch64
-libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-aarch64" % "5.1.1"
+libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-aarch64" % "5.1.2"
 ```
 
 Maven Central: [https://mvnrepository.com/artifact/com.johnsnowlabs.nlp](https://mvnrepository.com/artifact/com.johnsnowlabs.nlp)
@@ -248,15 +248,15 @@ maven coordinates like these:
 <dependency>
     <groupId>com.johnsnowlabs.nlp</groupId>
     <artifactId>spark-nlp-silicon_2.12</artifactId>
-    <version>5.1.1</version>
+    <version>5.1.2</version>
 </dependency>
 ```
 
 or in case of sbt:
 
 ```scala
 // https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp
-libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-silicon" % "5.1.1"
+libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-silicon" % "5.1.2"
 ```
 
 If everything went well, you can now start Spark NLP with the `m1` flag set to `true`:
@@ -293,7 +293,7 @@ spark = sparknlp.start(apple_silicon=True)
 
 ## Installation for Linux Aarch64 Systems
 
-Starting from version 5.1.1, Spark NLP supports Linux systems running on an aarch64
+Starting from version 5.1.2, Spark NLP supports Linux systems running on an aarch64
 processor architecture. The necessary dependencies have been built on Ubuntu 16.04, so a
 recent system with an environment of at least that will be needed.
 
@@ -341,7 +341,7 @@ This script comes with the two options to define `pyspark` and `spark-nlp` versi
 # -p is for pyspark
 # -s is for spark-nlp
 # by default they are set to the latest
-!wget http://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 5.1.1
+!wget http://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 5.1.2
 ```
 
 [Spark NLP quick start on Google Colab](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/quick_start_google_colab.ipynb) is a live demo on Google Colab that performs named entity recognitions and sentiment analysis by using Spark NLP pretrained pipelines.
@@ -363,7 +363,7 @@ Run the following code in Kaggle Kernel and start using spark-nlp right away.
 
 ## Databricks Support
 
-Spark NLP 5.1.1 has been tested and is compatible with the following runtimes:
+Spark NLP 5.1.2 has been tested and is compatible with the following runtimes:
 
 **CPU:**
 
@@ -439,7 +439,7 @@ Spark NLP 5.1.1 has been tested and is compatible with the following runtimes:
 
     3.1. Install New -> PyPI -> `spark-nlp` -> Install
 
-    3.2. Install New -> Maven -> Coordinates -> `com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1` -> Install
+    3.2. Install New -> Maven -> Coordinates -> `com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.2` -> Install
 
 4. Now you can attach your notebook to the cluster and use Spark NLP!
 
@@ -459,7 +459,7 @@ Note: You can import these notebooks by using their URLs.
 
 ## EMR Support
 
-Spark NLP 5.1.1 has been tested and is compatible with the following EMR releases:
+Spark NLP 5.1.2 has been tested and is compatible with the following EMR releases:
 
 - emr-6.2.0
 - emr-6.3.0
@@ -518,7 +518,7 @@ A sample of your software configuration in JSON on S3 (must be public access):
       "spark.kryoserializer.buffer.max": "2000M",
       "spark.serializer": "org.apache.spark.serializer.KryoSerializer",
       "spark.driver.maxResultSize": "0",
-      "spark.jars.packages": "com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1"
+      "spark.jars.packages": "com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.2"
     }
 }
 ]
@@ -528,7 +528,7 @@ A sample of AWS CLI to launch EMR cluster:
 
 ```sh
 aws emr create-cluster \
---name "Spark NLP 5.1.1" \
+--name "Spark NLP 5.1.2" \
 --release-label emr-6.2.0 \
 --applications Name=Hadoop Name=Spark Name=Hive \
 --instance-type m4.4xlarge \
@@ -793,7 +793,7 @@ We recommend using `conda` to manage your Python environment on Windows.
 Now you can use the downloaded binary by navigating to `%SPARK_HOME%\bin` and
 running
 
-Either create a conda env for python 3.6, install *pyspark==3.3.1 spark-nlp numpy* and use Jupyter/python console, or in the same conda env you can go to spark bin for *pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1*.
+Either create a conda env for python 3.6, install *pyspark==3.3.1 spark-nlp numpy* and use Jupyter/python console, or in the same conda env you can go to spark bin for *pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.2*.
 
 <img class="image image--xl" src="/assets/images/installation/90126972-c03e5500-dd64-11ea-8285-e4f76aa9e543.jpg" style="width:100%; align:center; box-shadow: 0 3px 6px rgba(0,0,0,0.16), 0 3px 6px rgba(0,0,0,0.23);"/>
 
@@ -821,12 +821,12 @@ spark = SparkSession.builder \
     .config("spark.driver.memory","16G")\
     .config("spark.driver.maxResultSize", "0") \
     .config("spark.kryoserializer.buffer.max", "2000M")\
-    .config("spark.jars", "/tmp/spark-nlp-assembly-5.1.1.jar")\
+    .config("spark.jars", "/tmp/spark-nlp-assembly-5.1.2.jar")\
     .getOrCreate()
 ```
 
 - You can download provided Fat JARs from each [release notes](https://github.com/JohnSnowLabs/spark-nlp/releases), please pay attention to pick the one that suits your environment depending on the device (CPU/GPU) and Apache Spark version (3.x)
-- If you are local, you can load the Fat JAR from your local FileSystem, however, if you are in a cluster setup you need to put the Fat JAR on a distributed FileSystem such as HDFS, DBFS, S3, etc. (i.e., `hdfs:///tmp/spark-nlp-assembly-5.1.1.jar`)
+- If you are local, you can load the Fat JAR from your local FileSystem, however, if you are in a cluster setup you need to put the Fat JAR on a distributed FileSystem such as HDFS, DBFS, S3, etc. (i.e., `hdfs:///tmp/spark-nlp-assembly-5.1.2.jar`)
 
 Example of using pretrained Models and Pipelines in offline:
 

diff --git a/docs/en/spark_nlp.md b/docs/en/spark_nlp.md
@@ -25,7 +25,7 @@ Spark NLP is built on top of **Apache Spark 3.x**. For using Spark NLP you need:
 
 **GPU (optional):**
 
-Spark NLP 5.1.1 is built with TensorFlow 2.7.1 and the following NVIDIA® software are only required for GPU support:
+Spark NLP 5.1.2 is built with TensorFlow 2.7.1 and the following NVIDIA® software are only required for GPU support:
 
 - NVIDIA® GPU drivers version 450.80.02 or higher
 - CUDA® Toolkit 11.2

diff --git a/docs/learn.md b/docs/learn.md
@@ -4,7 +4,7 @@ title: Learn
 permalink: /learnold
 key: videos
 aside:
-    toc: true 
+    toc: true
 license: false
 show_date: true
 modify_date: "2019-05-16"
@@ -38,7 +38,7 @@ modify_date: "2019-05-16"
 </div></div>
 
 <div class="cell cell--12 cell--lg-4 cell--sm-12"><div class="article-item" markdown="1">
-<span class="article-inner">![News](/assets/images/Article_1.jpg)[Improving Clinical Document Understanding on COVID-19 Research with Spark NLP](https://arxiv.org/abs/2012.04005?__hstc=162627875.7e2e14acbce8f704860db3c9a9f86d5f.1608308444598.1611854965224.1612279175672.32&__hssc=162627875.1.1612279175672&__hsfp=1326107387&hsCtaTracking=5ad1bb2e-ee9e-4ada-97a2-06fabb0f15ac%7C573cdf1c-4538-4a05-ad78-8cf40795d21f)</span><span class="video-descr">Veysel Kocaman, David Talby - 7 December, 2020</span>
+<span class="article-inner">![News](/assets/images/Article_1.jpg)[Improving Clinical Document Understanding on COVID-19 Research with Spark NLP](https://arxiv.org/abs/2012.04005?__hstc=162627875.7e2e14acbce8f704860db3c9a9f86d5f.1608308444598.1611854965224.1612279175672.32&__hssc=162627875.1.2612279175672&__hsfp=1326107387&hsCtaTracking=5ad1bb2e-ee9e-4ada-97a2-06fabb0f15ac%7C573cdf1c-4538-4a05-ad78-8cf40795d21f)</span><span class="video-descr">Veysel Kocaman, David Talby - 7 December, 2020</span>
 </div></div>
 
 <div class="cell cell--12 cell--lg-4 cell--sm-12"><div class="article-item" markdown="1">