2121
2222import java .io .ByteArrayInputStream ;
2323import java .nio .charset .StandardCharsets ;
24+ import java .util .Collections ;
2425import java .util .List ;
2526import java .util .Set ;
2627
@@ -116,7 +117,7 @@ public void testMetadataFactoryFieldsConfig() throws Exception {
116117 @ Test
117118 public void testKeySizeFilter () throws Exception {
118119 Metadata metadata = filter (10 , 1000 , 10000 , 100 ,
119- null , true );
120+ Collections . EMPTY_SET , Collections . EMPTY_SET , true );
120121 //test that must add keys are not truncated
121122 metadata .add (TikaCoreProperties .TIKA_PARSED_BY , "some-long-parser1" );
122123 metadata .add (TikaCoreProperties .TIKA_PARSED_BY , "some-long-parser2" );
@@ -138,13 +139,13 @@ public void testAfterMaxHit() throws Exception {
138139 String k = "dc:creator" ;//20 bytes
139140 //key is > maxTotalBytes, so the value isn't even added
140141 Metadata metadata = filter (100 , 10000 , 10 ,
141- 100 , null , false );
142+ 100 , Collections . EMPTY_SET , Collections . EMPTY_SET , false );
142143 metadata .set (k , "ab" );
143144 assertEquals (1 , metadata .names ().length );
144145 assertEquals ("true" , metadata .get (TikaCoreProperties .TRUNCATED_METADATA ));
145146
146147 metadata = filter (100 , 10000 , 50 , 100 ,
147- null , false );
148+ Collections . EMPTY_SET , Collections . EMPTY_SET , false );
148149 for (int i = 0 ; i < 10 ; i ++) {
149150 metadata .set (k , "abcde" );
150151 }
@@ -178,7 +179,8 @@ public void testAfterMaxHit() throws Exception {
178179 @ Test
179180 public void testMinSizeForAlwaysInclude () throws Exception {
180181 //test that mimes don't get truncated
181- Metadata metadata = filter (100 , 10 , 10000 , 100 , null , true );
182+ Metadata metadata = filter (100 , 10 , 10000 , 100 ,
183+ Collections .EMPTY_SET , Collections .EMPTY_SET , true );
182184
183185 String mime = getLongestMime ().toString ();
184186 metadata .set (Metadata .CONTENT_TYPE , mime );
@@ -192,21 +194,47 @@ public void testMinSizeForAlwaysInclude() throws Exception {
192194
193195 @ Test
194196 public void testMaxFieldValues () throws Exception {
195- Metadata metadata = filter (100 , 10000 , 10000 , 3 , null , true );
197+ Metadata metadata = filter (100 , 10000 , 10000 , 3 ,
198+ Collections .EMPTY_SET , Collections .EMPTY_SET , true );
196199 for (int i = 0 ; i < 10 ; i ++) {
197200 metadata .add (TikaCoreProperties .SUBJECT , "ab" );
198201 }
199202 assertEquals (3 , metadata .getValues (TikaCoreProperties .SUBJECT ).length );
200203 }
201204
205+ @ Test
206+ public void testExclude () throws Exception {
207+ TikaConfig tikaConfig =
208+ new TikaConfig (TikaConfigTest .class .getResourceAsStream ("TIKA-3695-exclude.xml" ));
209+ AutoDetectParser parser = new AutoDetectParser (tikaConfig );
210+ String mock = "<?xml version=\" 1.0\" encoding=\" UTF-8\" ?>" +
211+ "<mock>" ;
212+ mock += "<metadata action=\" add\" name=\" dc:creator\" >01234567890123456789</metadata>" ;
213+ mock += "<metadata action=\" add\" name=\" subject\" >01234567890123456789</metadata>" ;
214+ mock += "<metadata action=\" add\" name=\" subjectB\" >01234567890123456789</metadata>" ;
215+ mock += "<write element=\" p\" times=\" 1\" > hello </write>\n " ;
216+ mock += "</mock>" ;
217+ Metadata metadata = new Metadata ();
218+ List <Metadata > metadataList =
219+ getRecursiveMetadata (new ByteArrayInputStream (mock .getBytes (StandardCharsets .UTF_8 )),
220+ parser , metadata , new ParseContext (), true );
221+ assertEquals (1 , metadataList .size ());
222+ metadata = metadataList .get (0 );
223+ assertEquals (9 , metadata .names ().length );
224+ assertEquals ("01234567890123456789" , metadata .get ("dc:creator" ));
225+ assertEquals ("01234567890123456789" , metadata .get ("subjectB" ));
226+ assertNull (metadata .get ("subject" ));
227+ }
228+
229+
202230 private void assertTruncated (Metadata metadata ) {
203231 assertEquals ("true" , metadata .get (TikaCoreProperties .TRUNCATED_METADATA ));
204232 }
205233 private Metadata filter (int maxKeySize , int maxFieldSize , int maxTotalBytes ,
206234 int maxValuesPerField ,
207- Set <String > includeFields , boolean includeEmpty ) {
235+ Set <String > includeFields , Set < String > excludeFields , boolean includeEmpty ) {
208236 MetadataWriteFilter filter = new StandardWriteFilter (maxKeySize , maxFieldSize ,
209- maxTotalBytes , maxValuesPerField , includeFields , includeEmpty );
237+ maxTotalBytes , maxValuesPerField , includeFields , excludeFields , includeEmpty );
210238 Metadata metadata = new Metadata ();
211239 metadata .setMetadataWriteFilter (filter );
212240 return metadata ;
0 commit comments