Optimising a query - PostgreSQL - XPATH

Questvba
April 26, 2024
112 views
1 vote
2 Answers

I have the query (PostgreSQL) which works correctly.

with xml_test as (
    select
'<!--Build-Version: 1.564-REVb37dd024a8-20220530T1418--><tns:Message xsi:schemaLocation="https://appprod.be/Trivia/documents/Docs/XSDs/v1/T013 T013_v1.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:tns="https://appprod.be/Trivia/documents/Docs/XSDs/v1/T013">
  <Header>
    <Agent>90015299</Agent>
    <SocialFund>299</SocialFund>
    <MessageId>T013</MessageId>
    <Timestamp RequestDate="2022-06-16T23:20:00" ProcessDate="2022-06-16T23:25:20" ResponseDate="2022-06-16T23:25:20"/>
    <Reference NRK="6e6806740a6ea06d7ca6ae7b31edeb8e" Requestor="90010299"/>
    <Result>OK</Result>
    <ReturnCode>ResponseOnly</ReturnCode>
  </Header>
  <Body>
    <Request>
      <TransmissionMethod>BULK</TransmissionMethod>
      <SocialFundCode>299</SocialFundCode>
      <FileNumber>9913894</FileNumber>
      <FiscalYear>2020</FiscalYear>
      <FiscalHouseHolds>
        <FiscalHouseHold>
          <Beneficiary>66090999038</Beneficiary>
          <FiscalPartners>
            <FiscalPartner>65120999956</FiscalPartner>
          </FiscalPartners>
        </FiscalHouseHold>
        <FiscalHouseHold>
          <Beneficiary>66090999038</Beneficiary>
          <FiscalPartners>
            <FiscalPartner>65120999956</FiscalPartner>
            <FiscalPartner>94013099924</FiscalPartner>
          </FiscalPartners>
        </FiscalHouseHold>
      </FiscalHouseHolds>
    </Request>
  </Body>
</tns:Message>' as x
)

select 
    file_number
    , (xpath('//Beneficiary/text()', fiscal_household::xml))[1]::text benef
    , array_to_string( (xpath('//FiscalPartner/text()', fiscal_household::xml)), ' - ') fiscal_partners
from (
        select
            (xpath('/tnsMessage/Body/Request/FileNumber/text()', replace(x, ':','')::xml))[1]::text file_number
        ,   unnest(xpath('/tnsMessage/Body/Request/FiscalHouseHolds/FiscalHouseHold', replace(x, ':','')::xml))::text fiscal_household
        from xml_test
        ) temp
;

It gives me the correct results, which are these.

file_number	benef	fiscal_partners
9913894	66090999038	65120999956
9913894	66090999038	65120999956 – 94013099924

You can see that I have to go through an intermediate step and perform two SELECTs. Is it possible to eliminate this intermediate step? And/or optimise this query?
Thanks for all the inspiration.

Answers

Chosen as BEST ANSWER

But if we increase the number of xml files, we'll also increase the number of responses, won't we?

with xml_test as (

(
select
'<!--Build-Version: 1.564-REVb37dd024a8-20220530T1418--><tns:Message xsi:schemaLocation="https://appprod.be/Trivia/documents/Docs/XSDs/v1/T013 T013_v1.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:tns="https://appprod.be/Trivia/documents/Docs/XSDs/v1/T013">
  <Header>
    <Agent>90015299</Agent>
    <SocialFund>299</SocialFund>
    <MessageId>T013</MessageId>
    <Timestamp RequestDate="2022-06-16T23:20:00" ProcessDate="2022-06-16T23:25:20" ResponseDate="2022-06-16T23:25:20"/>
    <Reference NRK="6e6806740a6ea06d7ca6ae7b31edeb8e" Requestor="90010299"/>
    <Result>OK</Result>
    <ReturnCode>ResponseOnly</ReturnCode>
  </Header>
  <Body>
    <Request>
      <TransmissionMethod>BULK</TransmissionMethod>
      <SocialFundCode>299</SocialFundCode>
      <FileNumber>9913894</FileNumber>
      <FiscalYear>2020</FiscalYear>
      <FiscalHouseHolds>
        <FiscalHouseHold>
          <Beneficiary>66090999038</Beneficiary>
          <FiscalPartners>
            <FiscalPartner>65120999956</FiscalPartner>
          </FiscalPartners>
        </FiscalHouseHold>
        <FiscalHouseHold>
          <Beneficiary>66090999038</Beneficiary>
          <FiscalPartners>
            <FiscalPartner>65120999956</FiscalPartner>
            <FiscalPartner>94013099924</FiscalPartner>
          </FiscalPartners>
        </FiscalHouseHold>
        <FiscalHouseHold>
          <Beneficiary>66090999040</Beneficiary>
          <FiscalPartners>
            <FiscalPartner>65120990057</FiscalPartner>
            <FiscalPartner>94013000925</FiscalPartner>
          </FiscalPartners>
        </FiscalHouseHold>
      </FiscalHouseHolds>
    </Request>
  </Body>
</tns:Message>' as x
)
UNION 
(
    select
'<!--Build-Version: 1.564-REVb37dd024a8-20220530T1418--><tns:Message xsi:schemaLocation="https://appprod.be/Trivia/documents/Docs/XSDs/v1/T013 T013_v1.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:tns="https://appprod.be/Trivia/documents/Docs/XSDs/v1/T013">
  <Header>
    <Agent>90015299</Agent>
    <SocialFund>299</SocialFund>
    <MessageId>T013</MessageId>
    <Timestamp RequestDate="2022-06-16T23:20:00" ProcessDate="2022-06-16T23:25:20" ResponseDate="2022-06-16T23:25:20"/>
    <Reference NRK="6e6806740a6ea06d7ca6ae7b31edeb8e" Requestor="90010299"/>
    <Result>OK</Result>
    <ReturnCode>ResponseOnly</ReturnCode>
  </Header>
  <Body>
    <Request>
      <TransmissionMethod>BULK</TransmissionMethod>
      <SocialFundCode>299</SocialFundCode>
      <FileNumber>8813894</FileNumber>
      <FiscalYear>2020</FiscalYear>
      <FiscalHouseHolds>
        <FiscalHouseHold>
          <Beneficiary>88090999038</Beneficiary>
          <FiscalPartners>
            <FiscalPartner>88120999956</FiscalPartner>
          </FiscalPartners>
        </FiscalHouseHold>
        <FiscalHouseHold>
          <Beneficiary>88090999038</Beneficiary>
          <FiscalPartners>
            <FiscalPartner>88120999956</FiscalPartner>
            <FiscalPartner>94013099924</FiscalPartner>
          </FiscalPartners>
        </FiscalHouseHold>
        <FiscalHouseHold>
          <Beneficiary>88090999040</Beneficiary>
          <FiscalPartners>
            <FiscalPartner>88120990057</FiscalPartner>
            <FiscalPartner>88013000925</FiscalPartner>
          </FiscalPartners>
        </FiscalHouseHold>
      </FiscalHouseHolds>
    </Request>
  </Body>
</tns:Message>' as y
)
)
        SELECT
            (xpath('/tnsMessage/Body/Request/FileNumber/text()', replace(x, ':','')::xml))[1]::text file_number
                , (xpath('//Beneficiary/text()', fiscal_household::xml))[1]::text AS benef
                , array_to_string( (xpath('//FiscalPartner/text()', fiscal_household::xml)), ' - ') fiscal_partners
        FROM xml_test, (SELECT unnest(xpath('/tnsMessage/Body/Request/FiscalHouseHolds/FiscalHouseHold', replace(x, ':','')::xml))::text AS fiscal_household FROM xml_test) A ;

(Edit)

Extract fiscal_household as a cartesian product (i.e. unconstrained join) subquery. This effectively acts as a row-level function that appends the expression to the source output.

Note that I added a new, third beneficiary with new fiscal_partners to demonstrate that referential integrity is maintained.

with xml_test as (
    select
'<!--Build-Version: 1.564-REVb37dd024a8-20220530T1418--><tns:Message xsi:schemaLocation="https://appprod.be/Trivia/documents/Docs/XSDs/v1/T013 T013_v1.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:tns="https://appprod.be/Trivia/documents/Docs/XSDs/v1/T013">
  <Header>
    <Agent>90015299</Agent>
    <SocialFund>299</SocialFund>
    <MessageId>T013</MessageId>
    <Timestamp RequestDate="2022-06-16T23:20:00" ProcessDate="2022-06-16T23:25:20" ResponseDate="2022-06-16T23:25:20"/>
    <Reference NRK="6e6806740a6ea06d7ca6ae7b31edeb8e" Requestor="90010299"/>
    <Result>OK</Result>
    <ReturnCode>ResponseOnly</ReturnCode>
  </Header>
  <Body>
    <Request>
      <TransmissionMethod>BULK</TransmissionMethod>
      <SocialFundCode>299</SocialFundCode>
      <FileNumber>9913894</FileNumber>
      <FiscalYear>2020</FiscalYear>
      <FiscalHouseHolds>
        <FiscalHouseHold>
          <Beneficiary>66090999038</Beneficiary>
          <FiscalPartners>
            <FiscalPartner>65120999956</FiscalPartner>
          </FiscalPartners>
        </FiscalHouseHold>
        <FiscalHouseHold>
          <Beneficiary>66090999038</Beneficiary>
          <FiscalPartners>
            <FiscalPartner>65120999956</FiscalPartner>
            <FiscalPartner>94013099924</FiscalPartner>
          </FiscalPartners>
        </FiscalHouseHold>
        <FiscalHouseHold>
          <Beneficiary>66090999040</Beneficiary>
          <FiscalPartners>
            <FiscalPartner>65120990056</FiscalPartner>
            <FiscalPartner>94013000924</FiscalPartner>
          </FiscalPartners>
        </FiscalHouseHold>
      </FiscalHouseHolds>
    </Request>
  </Body>
</tns:Message>' as x
)
        SELECT
            (xpath('/tnsMessage/Body/Request/FileNumber/text()', replace(x, ':','')::xml))[1]::text file_number
                , (xpath('//Beneficiary/text()', fiscal_household::xml))[1]::text AS benef
                , array_to_string( (xpath('//FiscalPartner/text()', fiscal_household::xml)), ' - ') fiscal_partners
        FROM xml_test, (SELECT unnest(xpath('/tnsMessage/Body/Request/FiscalHouseHolds/FiscalHouseHold', replace(x, ':','')::xml))::text AS fiscal_household FROM xml_test) A ;

yields

file_number	benef	fiscal_partners
9913894	66090999038	65120999956
9913894	66090999038	65120999956 – 94013099924
9913894	66090999040	65120990056 – 94013000924

Important to note that just because this may be syntactically cleaner as a 2-stage query vs 3-stage, the query optimizer may still ultimately build the same plan.

Please signup or login to give your own answer.

Click here to cancel reply.

Optimising a query – PostgreSQL – XPATH

Answers