JohnSnowLabs · maziyarpanahi · Nov 17, 2021 · Nov 10, 2021 · Nov 10, 2021 · albertoandreottiATgmail
diff --git a/src/main/scala/com/johnsnowlabs/nlp/HasInputAnnotationCols.scala b/src/main/scala/com/johnsnowlabs/nlp/HasInputAnnotationCols.scala
@@ -34,7 +34,7 @@ trait HasInputAnnotationCols extends Params {
     new StringArrayParam(this, "inputCols", "the input annotation columns")
 
   /** Overrides required annotators column if different than default */
-  final def setInputCols(value: Array[String]): this.type = {
+  def setInputCols(value: Array[String]): this.type = {
     require(
       value.length == inputAnnotatorTypes.length,
       s"setInputCols in ${this.uid} expecting ${inputAnnotatorTypes.length} columns. " +

diff --git a/src/main/scala/com/johnsnowlabs/nlp/HasMultipleInputAnnotationCols.scala b/src/main/scala/com/johnsnowlabs/nlp/HasMultipleInputAnnotationCols.scala
@@ -0,0 +1,38 @@
+/*
+ * Copyright 2017-2021 John Snow Labs
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.johnsnowlabs.nlp
+
+/**
+ * Trait  used to create annotators with input columns of variable length.
+ * */
+trait HasMultipleInputAnnotationCols extends HasInputAnnotationCols {
+
+  /** Annotator reference id. The Annotator type is the same for any of the input columns*/
+  val inputAnnotatorType: String
+
+  lazy override val inputAnnotatorTypes: Array[String] = getInputCols.map(_ =>inputAnnotatorType)
+
+  /**
+    * Columns that contain annotations necessary to run this annotator
+    * AnnotatorType is the same for all input columns in that annotator.
+    */
+  override def  setInputCols(value: Array[String]): this.type = {
+    set(inputCols, value)
+  }
+
+
+}
diff --git a/...test/scala/com/johnsnowlabs/nlp/annotators/multipleannotations/MultiAnnotationsSpec.scala b/...test/scala/com/johnsnowlabs/nlp/annotators/multipleannotations/MultiAnnotationsSpec.scala
@@ -0,0 +1,52 @@
+package com.johnsnowlabs.nlp.annotators.multipleannotations
+
+import com.johnsnowlabs.nlp.{DocumentAssembler, LightPipeline, SparkAccessor}
+import com.johnsnowlabs.tags.FastTest
+import org.apache.spark.ml.Pipeline
+import org.scalatest.flatspec.AnyFlatSpec
+import com.johnsnowlabs.nlp.Annotation
+import org.junit.Assert.assertEquals
+
+class MultiAnnotationsSpec  extends AnyFlatSpec {
+  import SparkAccessor.spark.implicits._
+
+  "An multiple anootator chunks" should "transform data " taggedAs FastTest in {
+      val data = SparkAccessor.spark.sparkContext.parallelize(Seq("Example text")).toDS().toDF("text")
+
+    val documentAssembler = new DocumentAssembler()
+      .setInputCol("text")
+      .setOutputCol("document")
+
+    val documentAssembler2 = new DocumentAssembler()
+      .setInputCol("text")
+      .setOutputCol("document2")
+
+    val documentAssembler3 = new DocumentAssembler()
+      .setInputCol("text")
+      .setOutputCol("document3")
+
+    val multipleColumns = new MultiColumnApproach().setInputCols("document","document2","document3").setOutputCol("multiple_document")
+
+    val pipeline = new Pipeline()
+      .setStages(Array(
+        documentAssembler,
+        documentAssembler2,
+        documentAssembler3,
+        multipleColumns
+      ))
+
+    val pipelineModel = pipeline.fit(data)
+
+    val annotations = Annotation.collect(pipelineModel.transform(data),"multiple_document").flatten
+    assertEquals(annotations.length,3)
+
+    val result = new LightPipeline(pipelineModel).annotate("My document")
+
+
+    assertEquals(result("multiple_document").size,3)
+
+  }
+
+
+
+}
diff --git a/src/test/scala/com/johnsnowlabs/nlp/annotators/multipleannotations/MultiColumnApproach.scala b/src/test/scala/com/johnsnowlabs/nlp/annotators/multipleannotations/MultiColumnApproach.scala
@@ -0,0 +1,34 @@
+package com.johnsnowlabs.nlp.annotators.multipleannotations
+
+import com.johnsnowlabs.nlp.{AnnotatorApproach, HasMultipleInputAnnotationCols}
+import com.johnsnowlabs.nlp.AnnotatorType.{CHUNK, DOCUMENT}
+import org.apache.spark.ml.PipelineModel
+import org.apache.spark.ml.util.Identifiable
+import org.apache.spark.sql.Dataset
+
+
+class MultiColumnApproach(override val uid: String) extends AnnotatorApproach[MultiColumnsModel] with HasMultipleInputAnnotationCols{
+
+  def this() = this(Identifiable.randomUID("multiplecolums"))
+  override val description: String = "Example multiple columns"
+
+  /**
+   * Input annotator types: DOCUMEN
+   *
+   */
+  override val outputAnnotatorType: AnnotatorType = DOCUMENT
+  /**
+   * Output annotator type:DOCUMENT
+   *
+   */
+  override val inputAnnotatorType: AnnotatorType = DOCUMENT
+
+
+
+  override def train(dataset: Dataset[_], recursivePipeline: Option[PipelineModel]): MultiColumnsModel = {
+
+    new MultiColumnsModel().setInputCols($(inputCols)).setOutputCol($(outputCol))
+  }
+
+
+}
diff --git a/src/test/scala/com/johnsnowlabs/nlp/annotators/multipleannotations/MultiColumnsModel.scala b/src/test/scala/com/johnsnowlabs/nlp/annotators/multipleannotations/MultiColumnsModel.scala
@@ -0,0 +1,33 @@
+package com.johnsnowlabs.nlp.annotators.multipleannotations
+
+import com.johnsnowlabs.nlp.AnnotatorType.{CHUNK, DOCUMENT}
+import com.johnsnowlabs.nlp._
+import org.apache.spark.ml.util.Identifiable
+
+
+class MultiColumnsModel(override val uid: String) extends AnnotatorModel[MultiColumnsModel]
+  with HasMultipleInputAnnotationCols
+  with HasSimpleAnnotate[MultiColumnsModel]{
+
+  def this() = this(Identifiable.randomUID("MERGE"))
+
+  /**
+    * Input annotator types: DOCUMEN
+    *
+    */
+  override val outputAnnotatorType: AnnotatorType = DOCUMENT
+  /**
+    * Output annotator type:DOCUMENT
+    *
+    */
+  override val inputAnnotatorType: AnnotatorType = DOCUMENT
+
+
+  override def annotate(annotations: Seq[Annotation]): Seq[Annotation] = {
+    annotations
+  }
+
+
+}
+
+