Source code for pelican.util.codelists
import csv
import logging
import time
from collections import defaultdict
from io import StringIO
import cachetools.func
import requests
logger = logging.getLogger(__name__)
@cachetools.func.ttl_cache(ttl=86400) # 1 day
def _get(url: str) -> list[dict[str, str]]:
while True:
response = requests.get(url, timeout=10)
if response.status_code == requests.codes.too_many_requests:
logger.warning("HTTP 429 %s %s", url, response.headers)
time.sleep(1) # time.sleep() blocks the IO loop. An asynchronous version like asyncio.sleep() wouldn't.
else:
break
response.raise_for_status()
return list(csv.DictReader(StringIO(response.text)))
def _codes(url: str, key: str) -> tuple[str, ...]:
return tuple(row[key] for row in _get(url))
[docs]
def get_document_type_section_mapping() -> dict[str, list[str]]:
url = "https://raw.githubusercontent.com/open-contracting/standard/1.1/schema/codelists/documentType.csv"
mapping = defaultdict(list)
for row in _get(url):
mapping[row["Code"]] = row["Section"].split(", ")
return mapping
[docs]
def get_identifier_scheme_codelist() -> tuple[str, ...]:
return _codes("http://org-id.guide/download.csv", "code")
[docs]
def get_language_codelist() -> tuple[str, ...]:
url = "https://raw.githubusercontent.com/open-contracting/standard/1.2-dev/schema/codelists/language.csv"
return _codes(url, "Code")
[docs]
def get_ocid_prefix_codelist() -> tuple[str, ...]:
# https://docs.google.com/spreadsheets/d/1E5ZVhc8VhGOakCq4GegvkyFYT974QQb-sSjvOfaxH7s/pubhtml?gid=506986894&single=true&widget=true
url = "https://docs.google.com/spreadsheets/d/e/2PACX-1vQP8EwbUhsfxN7Fx7vX3mTA6Y8CXyGi04bHUepdcfxvM6VRVP9f5BWAYEG6MPbnJjWJp-La81DgG8wx/pub?gid=506986894&single=true&output=csv"
return _codes(url, "OCID")