def render_pdf(d, path):
“””Draw a realistic 3-page report. Page breaks are forced so the headline metric on
page 1 (abstract) is physically separated from the results table on page 3.”””
from reportlab.lib.pagesizes import LETTER
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.units import inch
from reportlab.lib import colors
from reportlab.platypus import (SimpleDocTemplate, Paragraph, Spacer,
Table, TableStyle, PageBreak)
ss = getSampleStyleSheet()
H1 = ParagraphStyle(“H1”, parent=ss[“Title”], fontSize=16, leading=20, spaceAfter=6)
AUTH = ParagraphStyle(“AUTH”, parent=ss[“Normal”], fontSize=9.5, textColor=colors.grey, spaceAfter=10)
H2 = ParagraphStyle(“H2”, parent=ss[“Heading2”], fontSize=12, spaceBefore=8, spaceAfter=4)
BODY = ParagraphStyle(“BODY”, parent=ss[“Normal”], fontSize=10, leading=14, spaceAfter=6)
sota_phrase = (f”surpassing the previous best of {d[‘prior_best’]}”
if d[“beats_sota”] else
f”approaching but not exceeding the previous best of {d[‘prior_best’]}”)
authors_line = “, “.join(f”{n} ({a})” for (n, a) in d[“authors”])
story = []
story += [Paragraph(d[“title”], H1), Paragraph(authors_line, AUTH), Paragraph(“Abstract”, H2)]
story += [Paragraph(
f”We introduce {d[‘method’]}, a model for {d[‘task’]}. On the {d[‘primary_benchmark’]} ”
f”benchmark, {d[‘method’]} attains {d[‘test_acc’]} {d[‘metric_name’]} on the held-out ”
f”test set, {sota_phrase}. Our {d[‘params_m’]}M-parameter model is evaluated across ”
f”{len(d[‘datasets’])} datasets ({‘, ‘.join(d[‘datasets’])}). ”
f”Extensive ablations confirm the contribution of each component.”, BODY)]
story += [Paragraph(“Keywords”, H2),
Paragraph(f”{d[‘task’]}; representation learning; {d[‘primary_benchmark’]}”, BODY),
PageBreak()]
story += [Paragraph(“1 Method and Training Details”, H2)]
story += [Paragraph(
f”{d[‘method’]} is trained end-to-end with the {d[‘optimizer’]} optimizer. ”
f”We tune on a validation split and report final numbers on the test split. ”
f”The full training configuration is summarized in Table 1.”, BODY)]
hp = [[“Hyperparameter”, “Value”],
[“Optimizer”, d[“optimizer”]],
[“Learning rate”, str(d[“lr”])],
[“Batch size”, str(d[“batch”])],
[“Epochs”, str(d[“epochs”])],
[“Parameters”, f”{d[‘params_m’]}M”]]
t1 = Table(hp, colWidths=[2.4 * inch, 2.0 * inch])
t1.setStyle(TableStyle([
(“BACKGROUND”, (0, 0), (-1, 0), colors.HexColor(“#2b3a67”)),
(“TEXTCOLOR”, (0, 0), (-1, 0), colors.white),
(“FONTSIZE”, (0, 0), (-1, -1), 9.5),
(“GRID”, (0, 0), (-1, -1), 0.4, colors.grey),
(“ROWBACKGROUNDS”, (0, 1), (-1, -1), [colors.white, colors.HexColor(“#eef1f8”)]),
(“LEFTPADDING”, (0, 0), (-1, -1), 8), (“TOPPADDING”, (0, 0), (-1, -1), 4),
(“BOTTOMPADDING”, (0, 0), (-1, -1), 4)]))
story += [Spacer(1, 4), t1, Spacer(1, 6),
Paragraph(“Table 1. Training configuration.”, BODY),
Paragraph(“2 Datasets”, H2),
Paragraph(
f”We evaluate on {‘, ‘.join(d[‘datasets’])}. {d[‘primary_benchmark’]} is our ”
f”primary benchmark; the remaining datasets are used for generalization ”
f”studies.”, BODY),
PageBreak()]
story += [Paragraph(“3 Results”, H2)]
res = [[“Method”, f”Val. {d[‘metric_name’]}”, f”Test {d[‘metric_name’]}”],
[f”{d[‘baseline_name’]} (baseline)”, str(d[“baseline_val”]), str(d[“baseline_test”])],
[f”{d[‘method’]} (ours)”, str(d[“val_acc”]), str(d[“test_acc”])]]
t2 = Table(res, colWidths=[2.6 * inch, 1.7 * inch, 1.7 * inch])
t2.setStyle(TableStyle([
(“BACKGROUND”, (0, 0), (-1, 0), colors.HexColor(“#7a2e2e”)),
(“TEXTCOLOR”, (0, 0), (-1, 0), colors.white),
(“FONTSIZE”, (0, 0), (-1, -1), 9.5),
(“GRID”, (0, 0), (-1, -1), 0.4, colors.grey),
(“FONTNAME”, (0, 2), (-1, 2), “Helvetica-Bold”),
(“ROWBACKGROUNDS”, (0, 1), (-1, -1), [colors.white, colors.HexColor(“#f7eeee”)]),
(“LEFTPADDING”, (0, 0), (-1, -1), 8), (“TOPPADDING”, (0, 0), (-1, -1), 4),
(“BOTTOMPADDING”, (0, 0), (-1, -1), 4)]))
story += [Spacer(1, 4), t2, Spacer(1, 6),
Paragraph(f”Table 2. Results on {d[‘primary_benchmark’]}. ”
f”Best test result in bold.”, BODY),
Paragraph(“4 Limitations”, H2)]
for lim in d[“limitations”]:
story += [Paragraph(“• ” + lim, BODY)]
story += [Paragraph(“5 Funding and Code Availability”, H2),
Paragraph(d[“funding_note”], BODY)]
SimpleDocTemplate(path, pagesize=LETTER,
topMargin=0.8 * inch, bottomMargin=0.8 * inch,
leftMargin=0.9 * inch, rightMargin=0.9 * inch).build(story)
print(“STEP 3/7 · Generating synthetic report PDFs…”)
CORPUS = []
for i, d in enumerate(DOCS):
path = f”/content/report_{i}.pdf” if os.path.isdir(“/content”) else f”report_{i}.pdf”
render_pdf(d, path)
CORPUS.append((d, ground_truth(d), path))
print(f” ✓ {os.path.basename(path)} — {d[‘method’]}”)
print()
if SHOW_FIRST_PAGE:
try:
import pypdfium2 as pdfium, matplotlib.pyplot as plt
pg = pdfium.PdfDocument(CORPUS[0][2])[0]
img = pg.render(scale=2.0).to_pil()
plt.figure(figsize=(6.4, 8.3)); plt.imshow(img); plt.axis(“off”)
plt.title(“What lift reads — page 1 of report_0.pdf”, fontsize=10); plt.show()
except Exception as e:
print(” (page preview skipped:”, e, “)\n”)
Trending
- Using Lift to Turn Research PDFs into Structured JSON with Controlled, Schema-Guided Field-Level Evaluation
- Why Powerful ML Is Deceptively Easy — Part 2
- Deploying retail AI to scale personalisation and customer insight
- CUP (Common Useful Python): Building Reliable Python Workflows with Baidu’s Utility Toolkit
- What Can We Do When Memory Becomes the New Bottleneck in Data Engineering?
- Japan Bets on AI Robots to Solve a Worker Shortage
- Google AI Introduces TabFM: A Hybrid-Attention Tabular Foundation Model for Zero-Shot Classification and Regression
- Can a 10-second finger workout help keep your brain healthy?
Previous ArticleWhy Powerful ML Is Deceptively Easy — Part 2
Related Posts
Add A Comment
Subscribe to Updates
Get the latest creative news from FooBar about art, design and business.
© 2026 insureai360. Designed by Pro.
