import tld
import re
import dateutil.parser as p
def get_domain_base(url):
dom = ""
try:
res = tld.get_tld(url, as_object=True)
dom = f'{res.domain}.{res.tld}'
except:
dom = url
return dom
def get_indented_text(text):
prefix = "> "
return re.sub("\n", f"\n{prefix}", re.sub("^", prefix, text))
def get_date(timestamp, date_format):
d = p.parse(timestamp)
return d.strftime(date_format)
def handler(pd: "pipedream"):
# Set constants
date_format = "%d.%m.%Y"
# Prepare inputs
book = pd.steps["book"]["$return_value"]
highlight = pd.steps["trigger"]["event"]
# Get domain base
url = book["source_url"]
domain_base = get_domain_base(url)
# Get indented text
text = highlight["text"]
indented_text = get_indented_text(text)
# Get highlight date
book_updated = book["updated"]
formatted_date = get_date(book_updated, date_format)
# Return data for use in future steps
return {
"formatted_date": formatted_date,
"domain_base": domain_base,
"indented_text": indented_text
}