52 lines
2.4 KiB
Python
52 lines
2.4 KiB
Python
from collections import defaultdict
|
|
|
|
|
|
def parse_entities(entities):
|
|
"""
|
|
>>> parse_entities([{'entity': 'B-weather_descriptor', 'score': 0.76314837, 'index': 4, 'word': '▁température', 'start': 15, 'end': 26}])
|
|
defaultdict(<class 'list'>, {'B-weather_descriptor': ['température']})
|
|
|
|
>>> parse_entities([\
|
|
{'entity': 'B-weather_descriptor', 'score': 0.98460984, 'index': 8, 'word': '▁ple', 'start': 16, 'end': 19}, \
|
|
{'entity': 'B-weather_descriptor', 'score': 0.98146856, 'index': 9, 'word': 'u', 'start': 19, 'end': 20}, \
|
|
{'entity': 'B-weather_descriptor', 'score': 0.97976905, 'index': 10, 'word': 'voir', 'start': 20, 'end': 24}, \
|
|
{'entity': 'B-date', 'score': 0.9455722, 'index': 12, 'word': '▁de', 'start': 28, 'end': 30}, \
|
|
{'entity': 'B-date', 'score': 0.9530212, 'index': 13, 'word': 'main', 'start': 30, 'end': 34}])
|
|
defaultdict(<class 'list'>, {'B-weather_descriptor': ['pleuvoir'], 'B-date': ['demain']})
|
|
|
|
>>> parse_entities([ \
|
|
{'entity': 'B-weather_descriptor', 'score': 0.9845413, 'index': 8, 'word': '▁ple', 'start': 16, 'end': 19}, \
|
|
{'entity': 'B-weather_descriptor', 'score': 0.9849722, 'index': 9, 'word': 'u', 'start': 19, 'end': 20}, \
|
|
{'entity': 'B-weather_descriptor', 'score': 0.97936, 'index': 10, 'word': 'voir', 'start': 20, 'end': 24}, \
|
|
{'entity': 'B-date', 'score': 0.9811183, 'index': 11, 'word': '▁de', 'start': 25, 'end': 27}, \
|
|
{'entity': 'B-date', 'score': 0.9860088, 'index': 12, 'word': 'main', 'start': 27, 'end': 31}, \
|
|
{'entity': 'B-date', 'score': 0.8544976, 'index': 14, 'word': '▁lundi', 'start': 35, 'end': 40}])
|
|
defaultdict(<class 'list'>, {'B-weather_descriptor': ['pleuvoir'], 'B-date': ['demain', 'lundi']})
|
|
|
|
"""
|
|
|
|
entities_dict = defaultdict(list)
|
|
last_entity_word = ""
|
|
last_entity_type = None
|
|
for e in entities:
|
|
word = e['word']
|
|
e_type = e['entity']
|
|
|
|
if word[0] != "▁":
|
|
last_entity_word = last_entity_word + word.strip("▁")
|
|
else:
|
|
if last_entity_type is not None:
|
|
entities_dict[last_entity_type].append(last_entity_word)
|
|
last_entity_type = e_type
|
|
last_entity_word = word.strip("▁")
|
|
|
|
entities_dict[last_entity_type].append(last_entity_word)
|
|
return entities_dict
|
|
|
|
|
|
if __name__ == "__main__":
|
|
import doctest
|
|
doctest.testmod()
|
|
|
|
|