Tweak URL parsing
This commit is contained in:
parent
1e6a290fb3
commit
4f98ff6bbf
6 changed files with 12 additions and 10 deletions
|
@ -24,7 +24,7 @@ def _get_prop(props: dict[str, Any], name: str, default=None) -> Any:
|
|||
|
||||
async def get_client_id_data(url: str) -> IndieAuthClient | None:
|
||||
# Don't fetch localhost URL
|
||||
if urlparse(url).netloc == "localhost":
|
||||
if urlparse(url).hostname == "localhost":
|
||||
return IndieAuthClient(
|
||||
logo=None,
|
||||
name=url,
|
||||
|
|
|
@ -39,7 +39,7 @@ def _scrap_og_meta(url: str, html: str) -> OpenGraphMeta | None:
|
|||
"title": soup.find("title").text,
|
||||
"image": None,
|
||||
"description": None,
|
||||
"site_name": urlparse(url).netloc,
|
||||
"site_name": urlparse(url).hostname,
|
||||
}
|
||||
for field in OpenGraphMeta.__fields__.keys():
|
||||
og_field = f"og:{field}"
|
||||
|
@ -60,7 +60,7 @@ async def external_urls(
|
|||
db_session: AsyncSession,
|
||||
ro: ap_object.RemoteObject | OutboxObject | InboxObject,
|
||||
) -> set[str]:
|
||||
note_host = urlparse(ro.ap_id).netloc
|
||||
note_host = urlparse(ro.ap_id).hostname
|
||||
|
||||
tags_hrefs = set()
|
||||
for tag in ro.tags:
|
||||
|
@ -84,7 +84,7 @@ async def external_urls(
|
|||
mimetype, _ = mimetypes.guess_type(h)
|
||||
if (
|
||||
ph.scheme in {"http", "https"}
|
||||
and ph.netloc != note_host
|
||||
and ph.hostname != note_host
|
||||
and is_url_valid(h)
|
||||
and (
|
||||
not mimetype
|
||||
|
|
|
@ -27,11 +27,13 @@ def replace_url(u: str) -> str:
|
|||
|
||||
try:
|
||||
parsed_href = urlparse(u)
|
||||
if not parsed_href.hostname:
|
||||
raise ValueError("Missing hostname")
|
||||
except Exception:
|
||||
logger.warning(f"Failed to parse url={u}")
|
||||
return u
|
||||
|
||||
if new_netloc := PRIVACY_REPLACE.get(parsed_href.netloc.removeprefix("www.")):
|
||||
if new_netloc := PRIVACY_REPLACE.get(parsed_href.hostname.removeprefix("www.")):
|
||||
return parsed_href._replace(netloc=new_netloc).geturl()
|
||||
|
||||
return u
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue