Skip to content

Commit 3f52364

Browse files
committed
add scala doc
1 parent 0430d86 commit 3f52364

File tree

4 files changed

+27
-38
lines changed

4 files changed

+27
-38
lines changed

src/main/scala/com/johnsnowlabs/nlp/HasMultipleInputAnnotationCols.scala

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,16 +16,20 @@
1616

1717
package com.johnsnowlabs.nlp
1818

19-
import com.johnsnowlabs.nlp.AnnotatorType.CHUNK
20-
import org.apache.spark.ml.param.{Params, StringArrayParam}
21-
import org.apache.spark.sql.types.StructType
22-
19+
/**
20+
* Trait used to create annotators with input columns of variable length.
21+
* */
2322
trait HasMultipleInputAnnotationCols extends HasInputAnnotationCols {
2423

24+
/** Annotator reference id. The Annotator type is the same for any of the input columns*/
2525
val inputAnnotatorType: String
2626

2727
lazy override val inputAnnotatorTypes: Array[String] = getInputCols.map(_ =>inputAnnotatorType)
2828

29+
/**
30+
* Columns that contain annotations necessary to run this annotator
31+
* AnnotatorType is the same for all input columns in that annotator.
32+
*/
2933
override def setInputCols(value: Array[String]): this.type = {
3034
set(inputCols, value)
3135
}

src/test/scala/com/johnsnowlabs/nlp/annotators/multipleannotations/MultiannotationsSpec.scala renamed to src/test/scala/com/johnsnowlabs/nlp/annotators/multipleannotations/MultiAnnotationsSpec.scala

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,17 @@
11
package com.johnsnowlabs.nlp.annotators.multipleannotations
22

3-
import com.johnsnowlabs.nlp.annotators.sbd.pragmatic.SentenceDetector
4-
import com.johnsnowlabs.nlp.{ContentProvider, DocumentAssembler, LightPipeline, RecursivePipeline, SparkAccessor}
5-
import com.johnsnowlabs.nlp.annotators.{TextMatcher, Tokenizer}
6-
import com.johnsnowlabs.nlp.util.io.ReadAs
3+
import com.johnsnowlabs.nlp.{DocumentAssembler, LightPipeline, SparkAccessor}
74
import com.johnsnowlabs.tags.FastTest
85
import org.apache.spark.ml.Pipeline
96
import org.scalatest.flatspec.AnyFlatSpec
7+
import com.johnsnowlabs.nlp.Annotation
8+
import org.junit.Assert.assertEquals
109

11-
class MultiannotationsSpec extends AnyFlatSpec {
10+
class MultiAnnotationsSpec extends AnyFlatSpec {
1211
import SparkAccessor.spark.implicits._
1312

1413
"An multiple anootator chunks" should "transform data " taggedAs FastTest in {
15-
val data = SparkAccessor.spark.sparkContext.parallelize(Seq("Example text")).toDS().toDF("text")
14+
val data = SparkAccessor.spark.sparkContext.parallelize(Seq("Example text")).toDS().toDF("text")
1615

1716
val documentAssembler = new DocumentAssembler()
1817
.setInputCol("text")
@@ -26,7 +25,7 @@ class MultiannotationsSpec extends AnyFlatSpec {
2625
.setInputCol("text")
2726
.setOutputCol("document3")
2827

29-
val multipleColumns = new MultiColumnApproach().setInputCols("document","document2","document3").setOutputCol("merge")
28+
val multipleColumns = new MultiColumnApproach().setInputCols("document","document2","document3").setOutputCol("multiple_document")
3029

3130
val pipeline = new Pipeline()
3231
.setStages(Array(
@@ -38,11 +37,13 @@ class MultiannotationsSpec extends AnyFlatSpec {
3837

3938
val pipelineModel = pipeline.fit(data)
4039

41-
pipelineModel.transform(data).show(truncate = false)
40+
val annotations = Annotation.collect(pipelineModel.transform(data),"multiple_document").flatten
41+
assertEquals(annotations.length,3)
4242

4343
val result = new LightPipeline(pipelineModel).annotate("My document")
4444

45-
println(result)
45+
46+
assertEquals(result("multiple_document").size,3)
4647

4748
}
4849

src/test/scala/com/johnsnowlabs/nlp/annotators/multipleannotations/MultiColumnApproach.scala

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13,23 +13,17 @@ class MultiColumnApproach(override val uid: String) extends AnnotatorApproach[Mu
1313
override val description: String = "Example multiple columns"
1414

1515
/**
16-
* Input annotator types: CHUNK
16+
* Input annotator types: DOCUMEN
1717
*
18-
* @group anno
1918
*/
2019
override val outputAnnotatorType: AnnotatorType = DOCUMENT
2120
/**
22-
* Output annotator types: CHUNK, CHUNK
21+
* Output annotator type:DOCUMENT
2322
*
24-
* @group anno
2523
*/
2624
override val inputAnnotatorType: AnnotatorType = DOCUMENT
2725

2826

29-
/** whether to merge overlapping matched chunks. Defaults to true
30-
*
31-
* @group param
32-
* */
3327

3428
override def train(dataset: Dataset[_], recursivePipeline: Option[PipelineModel]): MultiColumnsModel = {
3529

src/test/scala/com/johnsnowlabs/nlp/annotators/multipleannotations/MultiColumnsModel.scala

Lines changed: 7 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -11,28 +11,18 @@ class MultiColumnsModel(override val uid: String) extends AnnotatorModel[MultiCo
1111

1212
def this() = this(Identifiable.randomUID("MERGE"))
1313

14-
1514
/**
16-
* Input annotator types: CHUNK
17-
*
18-
* @group anno
19-
*/
15+
* Input annotator types: DOCUMEN
16+
*
17+
*/
2018
override val outputAnnotatorType: AnnotatorType = DOCUMENT
21-
22-
2319
/**
24-
* Multiple columns
25-
*
26-
* @group anno
27-
*/
20+
* Output annotator type:DOCUMENT
21+
*
22+
*/
23+
override val inputAnnotatorType: AnnotatorType = DOCUMENT
2824

29-
override val inputAnnotatorType: String = DOCUMENT
3025

31-
/**
32-
* Merges columns of chunk Annotations while considering false positives and replacements.
33-
* @param annotations a Sequence of chunks to merge
34-
* @return a Sequence of Merged CHUNK Annotations
35-
*/
3626
override def annotate(annotations: Seq[Annotation]): Seq[Annotation] = {
3727
annotations
3828
}

0 commit comments

Comments
 (0)