101 lines
3.2 KiB
Python
101 lines
3.2 KiB
Python
"""
|
|
High-level feed-related stuff.
|
|
|
|
See resource.feed for low-level stuff not primarily related to sites.
|
|
"""
|
|
|
|
from datetime import datetime
|
|
from typing import Optional
|
|
|
|
from ..models import Feed
|
|
from ..resource import store_feed_entries, update_feed
|
|
|
|
|
|
async def store_new_feeds(conn, site_id, feeds: dict):
|
|
"""
|
|
Store new feeds in table site_feed.
|
|
"""
|
|
sql = "SELECT array_agg(url) FROM site_feed WHERE site_id=$1"
|
|
known_feeds = (await conn.fetchval(sql, site_id)) or []
|
|
for feed_url in feeds.keys():
|
|
if feed_url not in known_feeds:
|
|
feed = Feed(
|
|
site_id=site_id,
|
|
url=feed_url,
|
|
)
|
|
await feed.save(conn)
|
|
|
|
|
|
async def get_feeds(conn, site_id) -> list[Feed]:
|
|
"""
|
|
Return stored feeds for the given site.
|
|
"""
|
|
sql = "SELECT * FROM site_feed WHERE site_id=$1"
|
|
rows = (await conn.fetch(sql, site_id)) or []
|
|
return [(await Feed().load_from_row(row)) for row in rows]
|
|
|
|
|
|
async def fetch_feeds(fetcher, conn, site) -> Optional[datetime]:
|
|
"""
|
|
Fetch feeds, add new resources and return the latest content update time.
|
|
"""
|
|
feeds = await get_feeds(conn, site.id_)
|
|
latest = None
|
|
for feed in feeds:
|
|
feed_content = await update_feed(fetcher, feed, conn)
|
|
if feed_content:
|
|
await store_feed_entries(conn, site, feed_content)
|
|
if feed.t_content:
|
|
latest = max(latest or feed.t_content, feed.t_content)
|
|
return latest
|
|
|
|
|
|
if __name__ == '__main__':
|
|
# only use this on a dev instance!
|
|
import asyncio
|
|
import logging
|
|
import sys
|
|
|
|
import aiohttp
|
|
|
|
from ..config import Config
|
|
from ..db import PGPool
|
|
from ..resource.fetch import ResourceFetcher
|
|
from .operations import process_site, update_site
|
|
|
|
logger = logging.getLogger()
|
|
logger.setLevel(logging.DEBUG)
|
|
config = Config().get()
|
|
url = sys.argv[1]
|
|
|
|
async def run():
|
|
"""
|
|
Fetch and display a site.
|
|
"""
|
|
app = None # TODO
|
|
async with PGPool(config['postgresql']) as pool:
|
|
async with pool.acquire() as conn:
|
|
async with aiohttp.ClientSession() as session:
|
|
fetcher = ResourceFetcher(session)
|
|
site, _ = await update_site(app, fetcher, conn, url)
|
|
logger.warning(site)
|
|
await process_site(fetcher, conn, site)
|
|
latest = await fetch_feeds(fetcher, conn, site)
|
|
logger.warning(f'latest: {latest}')
|
|
# feed = Feed(url=url)
|
|
# feed_content = await update_feed(fetcher, feed, conn)
|
|
# if isinstance(feed_content, ResourceError):
|
|
# print(feed_content)
|
|
# else:
|
|
# print(feed)
|
|
# pprint(feed_content[0])
|
|
# print('---- 2nd try ----')
|
|
# feed_content = await update_feed(fetcher, feed, conn)
|
|
# if isinstance(feed_content, ResourceError):
|
|
# print(feed_content)
|
|
# else:
|
|
# print(feed)
|
|
# pprint(feed_content[0])
|
|
|
|
asyncio.run(run())
|