skip to Main Content

I should transform a json file into pdf. I’m having trouble creating a table that allows me to make items that are too long wrap automatically and not overflow to the right side.
I paste an example of the json code that I should transform into pdf and then my implementation in python (which unfortunately returns a bad result)

json code:

"allIssues":[
      {
         "ruleId":"name",
         "description":"Description123",
         "help":"Description234",
         "impact":"critical",
         "selector":[
            "abc1234"
         ],
         "summary":"long text",
         "source":"long text2",
      },
      {
       ...
      },
            ]

My python implementation:

import json
from fpdf import FPDF
import pandas as pd

with open('input.json') as f:
    data = json.load(f)    

pdf = FPDF()

pdf.add_page()

pdf.set_font('Arial', 'B', 16)

# Title
pdf.cell(0, 10, 'Inspection summary', 0, 1)

pdf.set_font('Arial', '', 12)

df = pd.DataFrame(data['allIssues'])


df = df[['ruleId', 'description', 'help', 'impact', 'selector', 'summary', 'source']]


col_width = pdf.w / 2.2
row_height = pdf.font_size * 2

   for issue in df.itertuples(index=False):

    data = [
        ['ruleId:', str(issue.ruleId)],
        ['description:', issue.description],
        ['help:', issue.help],
        ['impact:', issue.impact],
        ['selector:', issue.selector],
        ['summary:', issue.summary],
        ['source:', issue.source],
    ]

    # Draw table
    for row in data:
        pdf.multi_cell(col_width, row_height, str(row[0]), border=0)
        pdf.multi_cell(col_width, row_height, str(row[1]), border=0)
        pdf.ln(row_height)

    # Draw line between tables
    pdf.line(10, pdf.get_y(), pdf.w - 10, pdf.get_y())
    pdf.ln(row_height)

pdf.output('output.pdf', 'F')

This is a screenshot of the output:
output

This is what I’m trying to achive
ref

Can you give me a hand? is it feasible to create something nice?

2

Answers


  1. There seems to be a lot of steps in your code. You could simply loop over the columns of your transposed df and export each of them to html. Append all html tables to a root html element and export with pdfkit:

    import json
    import pandas as pd
    import lxml.etree as et
    import pdfkit
    
    your_json = """{"url": "https://www.abc123.com", "extensionVersion": "4.51.0", "axeVersion": "4.6.3", "standard": "WCAG 2.1 AA", "testingStartDate": "2023-04-03T09:35:06.177Z", "testingEndDate": "2023-04-03T09:35:06.177Z", "bestPracticesEnabled": false, "issueSummary": {"critical": 2, "moderate": 0, "minor": 0, "serious": 0, "bestPractices": 0, "needsReview": 0}, "remainingTestingSummary": {"run": false}, "igtSummary": [], "failedRules": [{"name": "button-name", "count": 1, "mode": "automated"}, {"name": "select-name", "count": 1, "mode": "automated"}], "needsReview": [], "allIssues": [{"ruleId": "button-name", "description": "Ensures buttons have discernible text", "help": "Buttons must have discernible text", "helpUrl": "https://www.abc123.com", "impact": "critical", "needsReview": false, "isManual": false, "selector": [".livechat-button"], "summary": "Fix any of the following:\n  Element does not have inner text that is visible to screen readers\n  aria-label attribute does not exist or is empty\n  aria-labelledby attribute does not exist, references elements that do not exist or references elements that are empty\n  Element has no title attribute\n  Element's default semantics were not overridden with role=\"none\" or role=\"presentation\"", "source": "<button class=\"livechat-button items-center bg-black shadow-liveChat rounded-full text-white p-2 h-12 transition-all opacity-0 pointer-events-none w-sp-48 opacity-0 pointer-events-none\">", "tags": ["cat.name-role-value", "wcag2a", "wcag412", "section508", "section508.22.a", "ACT"], "igt": "", "shareURL": "", "createdAt": "2023-04-03T09:35:06.177Z", "testUrl": "", "testPageTitle": "ABC123", "foundBy": "[email protected]", "axeVersion": "4.6.3"}, {"ruleId": "select-name", "description": "Ensures select element has an accessible name", "help": "Select element must have an accessible name", "helpUrl": "https://www.abc123.com", "impact": "critical", "needsReview": false, "isManual": false, "selector": ["#plp__sortSelected"], "summary": "Fix any of the following:\n  Form element does not have an implicit (wrapped) <label>\n  Form element does not have an explicit <label>\n  aria-label attribute does not exist or is empty\n  aria-labelledby attribute does not exist, references elements that do not exist or references elements that are empty\n  Element has no title attribute\n  Element's default semantics were not overridden with role=\"none\" or role=\"presentation\"", "source": "<select class=\"w-full absolute opacity-0 appearance-none text-value-small font-bold text-black uppercase cursor-pointer bg-transparent outline-0\" id=\"plp__sortSelected\">", "tags": ["cat.forms", "wcag2a", "wcag412", "section508", "section508.22.n", "ACT"], "igt": "", "shareURL": "", "createdAt": "2023-04-03T09:35:06.177Z", "testUrl": "https://www.abc123.com", "testPageTitle": "ABC123", "foundBy": "[email protected]", "axeVersion": "4.6.3"}]}"""
    data = json.loads(your_json)
    
    ## replace the above lines with the following in your case
    # with open('your_file.json', 'r') as f:   
    #     data = json.load(f)
    
    html = et.Element("html")
    
    # general info
    html.append(et.fromstring(f"""<h3>Site link: <a href="{data['url']}">{data['url']}</a></h3>"""))
    html.append(et.fromstring(f"""<h4>Date: {data['testingEndDate']}</h4>"""))
    html.append(et.fromstring(f"""<h4>Summary:</h4>"""))
    
    # summary table
    summary = pd.Series(data['issueSummary'])
    summary_table = et.fromstring(summary.to_frame().to_html(header=False))
    summary_table.set('class', 'summary')
    html.append(summary_table)
    
    # issue tables
    cols_of_interest = ['ruleId', 'description', 'help', 'impact', 'selector', 'summary', 'source']
    df = pd.DataFrame(data['allIssues'])[cols_of_interest].T
    for col in df.columns:
        table = et.fromstring(df[[col]].to_html(header=False))
        table.set('class', 'issue')
        html.append(table)
        html.append(et.fromstring('<br/>'))
    
    pdfkit.from_string(et.tostring(html, encoding="unicode"), "./output.pdf", css='style.css')
    

    With the following css file:

    /* style.css */
    * {
        font-family: 'Liberation Sans';
    }
    
    table {
        margin: 20px;
        margin-left: auto;
        margin-right: auto;
    }
    
    table.summary {
        width: 50%;
    }
    
    table.issue{
        border: 0;
        width: 100%;
        border-collapse: collapse;
      }
      
    table.issue td,
    table.issue th {
        border: 0;
        text-align: left;
        padding: 5px;
    }
    
    table.issue tr {
    border-bottom: 1px solid #dddddd;
    }
    

    You’ll get:

    enter image description here

    Edit: updated json with the data you provided + exporting additional data + improved css

    Note: you will need to install wkhtmltopdf and make sure that it is in your path.

    Edit2: limiting output to desired fields

    Login or Signup to reply.
  2. disclaimer: I am the author of borb, the library used in this answer.

    Assuming your data looks like this:

    data = [
          {
             "ruleId":"name",
             "description":"Description123",
             "help":"Description234",
             "impact":"critical",
             "selector":[
                "abc1234"
             ],
             "summary":"long text",
             "source":"long text2",
          },
    ]
    

    You can run the following code:

    from borb.pdf import Document, Page, PageLayout, SingleColumnLayout, Paragraph, HexColor, Table, TableUtil
    from decimal import Decimal
    
    # create empty document
    doc: Document = Document()
    
    # create empty page
    page: Page = Page()
    doc.add_page(page)
    
    # use a PageLayout to be able to add things easily
    layout: PageLayout = SingleColumnLayout(page)
    
    # generate a Table for each issue
    for i, issue in enumerate(data):
    
      # add a header (Paragraph)
      layout.add(Paragraph("Issue %d" % i, font_size=Decimal(20), font_color=HexColor("#B5F8FE")))
    
      # add a Table (using the convenient TableUtil class)
      table: Table = TableUtil.from_2d_array([["Rule ID", issue.get("ruleId", "N.A.")],
                                              ["Description", issue.get("description", "N.A.")],
                                              ["Help", issue.get("help", "N.A.")],
                                              ["Impact", issue.get("impact", "N.A.")],
                                              ["Selector", str(issue.get("selector", []))],
                                              ["Summary", issue.get("summary", "N.A.")],
                                              ["Source", issue.get("source", "N.A.")],
                                              ], header_row=False, header_col=True, flexible_column_width=False)
      layout.add(table)
    
    # store the PDF
    with open("output.pdf", "wb") as fh:
      PDF.dumps(fh, doc)
    

    This generates the following PDF:

    enter image description here

    Login or Signup to reply.
Please signup or login to give your own answer.
Back To Top
Search