@@ -475,31 +475,34 @@ def _add_country_full_names(self, country_code: str, names: Sequence[PlaceName],
475475 assert self .conn is not None
476476 word_tokens = set ()
477477 for name in names :
478- norm_name = self ._search_normalized (name .name )
479- if norm_name :
480- word_tokens .add (norm_name )
478+ norm_name = self ._normalized (name .name )
479+ token_name = self ._search_normalized (name .name )
480+ if norm_name and token_name :
481+ word_tokens .add ((token_name , norm_name ))
481482
482483 with self .conn .cursor () as cur :
483484 # Get existing names
484- cur .execute ("""SELECT word_token, coalesce(info ? 'internal', false) as is_internal
485+ cur .execute ("""SELECT word_token,
486+ word as lookup,
487+ coalesce(info ? 'internal', false) as is_internal
485488 FROM word
486- WHERE type = 'C' and word = %s""" ,
489+ WHERE type = 'C' and info->>'cc' = %s""" ,
487490 (country_code , ))
488491 # internal/external names
489- existing_tokens : Dict [bool , Set [str ]] = {True : set (), False : set ()}
492+ existing_tokens : Dict [bool , Set [Tuple [ str , str ] ]] = {True : set (), False : set ()}
490493 for word in cur :
491- existing_tokens [word [1 ]].add (word [0 ])
494+ existing_tokens [word [2 ]].add (( word [0 ], word [ 1 ]) )
492495
493496 # Delete names that no longer exist.
494497 gone_tokens = existing_tokens [internal ] - word_tokens
495498 if internal :
496499 gone_tokens .update (existing_tokens [False ] & word_tokens )
497500 if gone_tokens :
498501 cur .execute ("""DELETE FROM word
499- USING unnest (%s::text[] ) as token
500- WHERE type = 'C' and word = %s
501- and word_token = token """ ,
502- (list (gone_tokens ), country_code ))
502+ USING jsonb_array_elements (%s) as data
503+ WHERE type = 'C' and info->>'cc' = %s
504+ and word_token = data->>0 and word = data->>1 """ ,
505+ (Jsonb ( list (gone_tokens ) ), country_code ))
503506
504507 # Only add those names that are not yet in the list.
505508 new_tokens = word_tokens - existing_tokens [True ]
@@ -508,15 +511,17 @@ def _add_country_full_names(self, country_code: str, names: Sequence[PlaceName],
508511 if new_tokens :
509512 if internal :
510513 sql = """INSERT INTO word (word_token, type, word, info)
511- (SELECT token, 'C', %s, '{"internal": "yes"}'
512- FROM unnest(%s::text[]) as token)
514+ (SELECT data->>0, 'C', data->>1,
515+ jsonb_build_object('internal', 'yes', 'cc', %s::text)
516+ FROM jsonb_array_elements(%s) as data)
513517 """
514518 else :
515- sql = """INSERT INTO word (word_token, type, word)
516- (SELECT token, 'C', %s
517- FROM unnest(%s::text[]) as token)
519+ sql = """INSERT INTO word (word_token, type, word, info)
520+ (SELECT data->>0, 'C', data->>1,
521+ jsonb_build_object('cc', %s::text)
522+ FROM jsonb_array_elements(%s) as data)
518523 """
519- cur .execute (sql , (country_code , list (new_tokens )))
524+ cur .execute (sql , (country_code , Jsonb ( list (new_tokens ) )))
520525
521526 def process_place (self , place : PlaceInfo ) -> Mapping [str , Any ]:
522527 """ Determine tokenizer information about the given place.
0 commit comments