#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# collapse-checker.py: check a HTML file for too-small/unnecessarily 'collapsed' HTML blocks
# Author: Gwern Branwen
# Date: 2024-01-04
# When: Time-stamp: "2024-04-28 20:36:46 gwern"
# License: CC-0
#
# Usage: $ collapse-checker.py foo.html
#
# On Gwern.net, we make heavy use of 'collapses': code folding or disclosure-like regions which are shrunk by default,
# but the user can hover over or click to expand into the full section. This is useful for managing complexity and
# allowing readers to opt into levels of detail. (It is also useful for handling transclusions, as transcludes inside a collapse
# are lazy and will not transclude until the reader evaluates them.) But we can overuse collapses, especially by collapsing small items,
# like 1--2 item lists. (Especially common in annotations' "See Also" lists, where I've pruned them down to just a few relevant links,
# but then forgot to remove the 'collapse' class manually.) These are bad because it impose a lot of visual clutter
# (to denote a collapsed/uncollapsed region) and the reader has to think about them and activate them etc, which are burdens.
#
# So we use BeautifulSoup + heuristics to parse Gwern.net-style HTML files for collapsed blocks, try to guess if they are 'small' or 'large', and print out a warning if small.
#
# This is used in the site build process to check all essay & annotations.
#
# By default, it prints out the filename of files which have a bad-looking collapse. If the filename has percent signs in it, indicating that it is URL-encoded (eg. `/metadata/annotation/*.html` snippets), it is URL-decoded for easier reference/modification.
## Some examples of 'too small' collapses:
# html_content1 = """
#
#
#
one
#
two
#
#
# """
# html_content = "
foo
bar
"
# html_content = "
Foo foo bar.
bar
"
from bs4 import BeautifulSoup, Tag
import sys
from urllib.parse import unquote
def print_filename(filename):
decoded_filename = unquote(filename)
print(decoded_filename)
def print_red(message):
"""
Prints a message to stderr in red.
Args:
- message (str): The message to print.
"""
# ANSI escape code for red, then message, then reset color
sys.stderr.write(f"\033[91m{message}\033[0m\n")
def read_and_parse_html(filename):
"""Read HTML content from a file and return a BeautifulSoup object."""
try:
with open(filename, "r") as file:
html_content = file.read()
return BeautifulSoup(html_content, "html.parser")
except IOError as e:
print_red(f"Error reading file {filename}: {e}")
return None
def has_excluded_class(element, exclude_classes):
"""Check if the element or any of its parents have a class that should be excluded."""
current_element = element
while current_element is not None:
if any(cls in exclude_classes for cls in current_element.get("class", [])):
return True
current_element = current_element.parent
return False
def check_for_incorrect_collapse_usage(soup, filename):
"""Check the BeautifulSoup object for incorrect .collapse usage and log findings."""
collapse_elements = soup.find_all(class_="collapse")
exclude_elements = ["h1", "h2", "h3", "h4", "h5", "h6", "section", "span", "code", "pre", "figure", "img", "a"]
exclude_classes = {"backlinks-append", "similars-append", "link-bibliography-append", "aux-links-transclude-file"} # we exclude .aux-links-append because 'See Also' have the most redundant collapses
for element in collapse_elements:
# Check if the element itself or any parent should be excluded based on classes
if has_excluded_class(element, exclude_classes):
continue
# Skip if element itself or any inner element has excluded tags
if element.name in exclude_elements or element.find(exclude_elements):
continue
# Initialize content volume assessment
content_volume = 0
content_volume += len(element.find_all("p")) + len(element.find_all("br"))
# Add volume for figure and img tags due to their significant content contribution
content_volume += 3 * len(element.find_all("figure")) # Arbitrarily weighting figures more
content_volume += 2 * len(element.find_all("img")) # Weighting images less because might be inline rather than big
blocks
# Direct child tags that are meaningful for content volume
direct_children = [child for child in element.children if isinstance(child, Tag)]
direct_children_str = ", ".join(str(e) for e in direct_children) # Convert each element to string and join with comma
for child in direct_children:
# Increment content volume for direct meaningful children
if child.name not in ["ul", "ol", "div"]:
content_volume += 1
# For
, , add the count of
items to content volume
elif child.name in ["ul", "ol"]:
content_volume += len(child.find_all("li"))
# For
or similar containers, consider nested lists
elif child.name == "div":
nested_lists = child.find_all(["ul", "ol"])
for lst in nested_lists:
content_volume += len(lst.find_all("li"))
if content_volume <= 6:
print_filename(filename)
# print_red(element)
# print_red("------------------------------------------------")
# print_red(f"Incorrect use of '.collapse' class in {filename}!")
# print_red("Debugging information... direct children elements: " + str(direct_children))
# print_red("'.collapse' elements: " + str(collapse_elements))
# print_red("------------------------------------------------")
if __name__ == "__main__":
if len(sys.argv) < 2:
print_red("Usage: python collapse-checker.py [filename2] …")
sys.exit(1)
for filename in sys.argv[1:]:
soup = read_and_parse_html(filename)
check_for_incorrect_collapse_usage(soup, filename)
He interrupted her. Close at hand is a stable where two beautiful ponies are kept. They are snowy white, and are consecrated to the goddess Ku-wanon, the deity of mercy, who is the presiding genius of the temple. They are in the care of a young girl, and it is considered a pious duty to feed them. Pease and beans are for sale outside, and many devotees contribute a few cash for the benefit of the sacred animals. If the poor beasts should eat a quarter of what is offered to them, or, rather, of what is paid for, they would soon die of overfeeding. It is shrewdly suspected that the grain is sold many times over, in consequence of a collusion between the dealers and the keeper of the horses. At all events, the health of the animals is regarded, and it would never do to give them all that is presented. On their return from the garden they stopped at a place where eggs are hatched by artificial heat. They are placed over brick ovens or furnaces, where a gentle heat is kept up, and a man is constantly on watch to see that the fire neither burns too rapidly nor too slowly. A great heat would kill the vitality of the egg by baking it, while if the temperature falls below a certain point, the hatching process does not go on. When the little chicks appear, they are placed under the care of an artificial mother, which consists of a bed of soft down and feathers, with a cover three or four inches above it. This cover has strips of down hanging from it, and touching the bed below, and the chickens nestle there quite safe from outside cold. The Chinese have practised this artificial hatching and rearing for thousands of years, and relieved the hens of a great deal of the monotony of life. He would not have it in the scabbard, and when I laid it naked in his hand he kissed the hilt. Charlotte sent Gholson for Ned Ferry. Glancing from the window, I noticed that for some better convenience our scouts had left the grove, and the prisoners had been marched in and huddled close to the veranda-steps, under their heavy marching-guard of Louisianians. One of the blue-coats called up to me softly: "Dying--really?" He turned to his fellows--"Boys, Captain's dying." Assuming an air of having forgotten all about Dick¡¯s rhyme, he went to his place in the seat behind Jeff and the instant his safety belt was snapped Jeff signaled to a farmer who had come over to investigate and satisfy himself that the airplane had legitimate business there; the farmer kicked the stones used as chocks from under the landing tires and Jeff opened up the throttle. ¡°Yes,¡± Dick supplemented Larry¡¯s new point. ¡°Another thing, Sandy, that doesn¡¯t explain why he¡¯d take three boys and fly a ship he could never use on water¡ªwith an amphibian right here.¡± Should you leave me too, O my faithless ladie? And years of remorse and despair been your fate, That night was a purging. From thenceforward Reuben was to press on straight to his goal, with no more slackenings or diversions. "Is that you, Robin?" said a soft voice; and a female face was seen peeping half way down the stairs. HoMElãñÔóÂÜÀ³ó
ENTER NUMBET 0016www.llkbsn.com.cn hfmedia.com.cn lz1zb.net.cn www.micao.net.cn fwoxxb.com.cn nrefs.org.cn www.mpchain.com.cn voleye.net.cn mlcygs.org.cn www.ruyugroup.com.cn