From 554cf14451181f6b5df03a33bb3f3b180f27d483 Mon Sep 17 00:00:00 2001 From: ibu Date: Sun, 2 Jan 2022 14:44:44 +0000 Subject: [PATCH] Add resource length to elasticsearch index --- src/atextcrawler/search/engine.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/atextcrawler/search/engine.py b/src/atextcrawler/search/engine.py index 7a72de6..f029a7b 100644 --- a/src/atextcrawler/search/engine.py +++ b/src/atextcrawler/search/engine.py @@ -53,6 +53,7 @@ properties = { 'time_horizon': {'type': 'keyword'}, 'orig_source': {'type': 'text'}, 'topics': {'type': 'text'}, + 'length': {'type': 'integer'}, 'annotations': {'type': 'text', 'index': False}, 'sections': { 'type': 'nested', @@ -179,6 +180,7 @@ async def index_resource( 'time_horizon': resource.search_fields.get('time_horizon'), 'orig_source': resource.search_fields.get('orig_source'), 'topics': resource.search_fields.get('topics'), + 'length': len(text), 'annotations': pack_annotations(annotations), 'sections': sections, }