skip to Main Content

The API I am using only returns a max of 1000 records per request, so I need to change the range and keep requesting until all records are returned. I am trying to keep the parent node and merge the json data under the "StudyFields" node. Just having the data under StudyFields would work also.

API URL (example):

https://classic.clinicaltrials.gov/api/query/study_fields?expr=aspirin&fields=InterventionName,NCTId,BriefTitle,InterventionType,InterventionDescription,InterventionOtherName,OverallStatus,LastUpdateSubmitDate&min_rnk=1&max_rnk=1000

Data format:

{
  "StudyFieldsResponse":{
    "APIVrs":"1.01.05",
    "DataVrs":"2023:09:07 00:28:48.692",
    "Expression":"aspirin",
    "NStudiesAvail":465100,
    "NStudiesFound":2548,
    "MinRank":1,
    "MaxRank":1000,
    "NStudiesReturned":1000,
    "FieldList":[
      "InterventionName",
      "NCTId",
      "BriefTitle",
      "InterventionType",
      "InterventionDescription",
      "InterventionOtherName",
      "OverallStatus",
      "LastUpdateSubmitDate"
    ],
    "StudyFields":[
      {
        "Rank":1,
        "InterventionName":[
          "Aspirin",
          "blood sample"
        ],
        "NCTId":[
          "NCT01375400"
        ]
      } 
    ]
  }
}

What I am getting (multiple parent nodes):

  {
  "StudyFieldsResponse":{
    "APIVrs":"1.01.05",
    "DataVrs":"2023:09:07 00:28:48.692",
    "Expression":"aspirin",
    "NStudiesAvail":465100,
    "NStudiesFound":2548,
    "MinRank":1,
    "MaxRank":1000,
    "NStudiesReturned":1000,
    "FieldList":[
      "InterventionName",
      "NCTId",
      "BriefTitle",
      "InterventionType",
      "InterventionDescription",
      "InterventionOtherName",
      "OverallStatus",
      "LastUpdateSubmitDate"
    ],
    "StudyFields":[
      {
        "Rank":1,
        "InterventionName":[
          "Aspirin",
          "blood sample"
        ],
        "NCTId":[
          "NCT01375400"
        ]
       ]
      }
     ]
    }
}
{
  "StudyFieldsResponse":{
    "APIVrs":"1.01.05",
    "DataVrs":"2023:09:07 00:28:48.692",
    "Expression":"aspirin",
    "NStudiesAvail":465100,
    "NStudiesFound":2548,
    "MinRank":1001,
    "MaxRank":2000,
    "NStudiesReturned":1000,
    "FieldList":[
      "InterventionName",
      "NCTId",
      "BriefTitle",
      "InterventionType",
      "InterventionDescription",
      "InterventionOtherName",
      "OverallStatus",
      "LastUpdateSubmitDate"
    ],
    "StudyFields":[
      {
        "Rank":1001,
        "InterventionName":[
          "Naoxintong Capsule",
          "Placebo"
        ],
        "NCTId":[
          "NCT05278182"
        ]
      }
    }
   ]
  }
}

What I want (single parent, multiple children under the StudyFields node):

  {
  "StudyFieldsResponse":{
    "APIVrs":"1.01.05",
    "DataVrs":"2023:09:07 00:28:48.692",
    "Expression":"aspirin",
    "NStudiesAvail":465100,
    "NStudiesFound":2548,
    "MinRank":1,
    "MaxRank":1000,
    "NStudiesReturned":1000,
    "FieldList":[
      "InterventionName",
      "NCTId",
      "BriefTitle",
      "InterventionType",
      "InterventionDescription",
      "InterventionOtherName",
      "OverallStatus",
      "LastUpdateSubmitDate"
    ],
    "StudyFields":[
      {
        "Rank":1,
        "InterventionName":[
          "Aspirin",
          "blood sample"
        ],
        "NCTId":[
          "NCT01375400"
        ]
       }
      ]
      {
        "Rank":1001,
        "InterventionName":[
          "Naoxintong Capsule",
          "Placebo"
        ],
        "NCTId":[
          "NCT05278182"
        ]
      }
     ]
    }
}

Here is my code:

$max_rnk=1000;
for ($i=1; $i<=$NStudiesFound; $i=$i+1000){
        
 $url1= 'https://classic.clinicaltrials.gov/api/query/study_fields?expr='.$input_result;
 $url1.='&fields='.$fields;
 $url1.='&min_rnk='.$i.'&max_rnk='.$max_rnk.'&fmt=json'; 
        
 $data[]=json_decode(file_get_contents($url1),true);    

 $max_rnk=$max_rnk+1000;

}

$file = 'intervention4.json';
$data_merge = json_encode($data);
file_put_contents($file, $data_merge);

3

Answers


  1. Use array_merge() to concatenate the arrays rather than pushing each response into a new element of the array.

    $data = [];
    $max_rnk=1000;
    for ($i=1; $i<=$NStudiesFound; $i=$i+1000){
            
        $url1= 'https://classic.clinicaltrials.gov/api/query/study_fields?expr='.$input_result;
        $url1.='&fields='.$fields;
        $url1.='&min_rnk='.$i.'&max_rnk='.$max_rnk.'&fmt=json'; 
            
        $data = array_merge($data, json_decode(file_get_contents($url1),true));
    
        $max_rnk=$max_rnk+1000;
    }
    
    $file = 'intervention4.json';
    $data_merge = json_encode($data);
    file_put_contents($file, $data_merge);
    
    Login or Signup to reply.
  2. If you just want to aggregate things under the ‘StudyFields’ key, then you can use something like this:

    $combinedData = [];
    $max_rnk=1000;
    for ($i=1; $i<=$NStudiesFound; $i=$i+1000){
            
     $url1= 'https://classic.clinicaltrials.gov/api/query/study_fields?expr='.$input_result;
     $url1.='&fields='.$fields;
     $url1.='&min_rnk='.$i.'&max_rnk='.$max_rnk.'&fmt=json'; 
            
     $data=json_decode(file_get_contents($url1),true);
     if ($combinedData) {
        $combinedData['StudyFieldsResponse']['StudyFields'] = array_merge(
            $combinedData['StudyFieldsResponse']['StudyFields'],
            $data['StudyFieldsResponse']['StudyFields']
        );
     } else {
         $combinedData = $data;
     }
    
    
     $max_rnk=$max_rnk+1000;
    
    }
    
    $file = 'intervention4.json';
    $data_merge = json_encode($combinedData);
    file_put_contents($file, $data_merge);
    
    Login or Signup to reply.
  3. Simply dumping all the result sets together into one gigantic array is going to be problematic as your data set grows. This is the perfect place to implement a generator. Make an API hit to grab 1000 results, yield them individually, and then repeat until you’re out of records. This way, you process as you go, and you only ever use 1000 record’s worth of resources.

    class Trials implements IteratorAggregate
    {
        public function getIterator(): Generator
        {
            $min = 1;
            $size = 1000;
            $url = 'https://classic.clinicaltrials.gov/api/query/study_fields';
            do {
                $params = http_build_query([
                    'expr' => 'aspirin',
                    'fields' => 'InterventionName,NCTId,BriefTitle,...',
                    'min_rnk' => $min,
                    'max_rnk' => $min + $size - 1,
                    'fmt' => 'json',
                ]);
                $data = json_decode(file_get_contents($url . '?' . $params), true);
                yield from $data['StudyFieldsResponse']['StudyFields'];
                $min += $size;
            } while ($data['StudyFieldsResponse']['MaxRank'] < $data['StudyFieldsResponse']['NStudiesFound']);
        }
    }
    

    Then all you need to do is iterate over an instance of the object itself, and you’ll transparently get each record individually:

    foreach (new Trials() as $trial) {
        printf("Rank %dn", $trial['Rank']);
    }
    

    Even better, create a constructor so you can pass the other arguments, and make the field list a default:

    class Trials implements IteratorAggregate
    {
        const DEFAULT_FIELDS = [
            'InterventionName',
            'NCTId',
            'BriefTitle',
            'InterventionType',
            'InterventionDescription',
            'InterventionOtherName',
            'OverallStatus',
            'LastUpdateSubmitDate',
        ];
    
        private string $expr;
    
        private array $fields;
    
        public function __construct(string $expr, array $fields = [])
        {
            $this->expr = $expr;
            $this->fields = $fields ?: self::DEFAULT_FIELDS;
        }
    
        public function getIterator(): Generator
        {
            $min = 1;
            $size = 1000;
            $url = 'https://classic.clinicaltrials.gov/api/query/study_fields';
            do {
                $params = http_build_query([
                    'expr' => $this->expr,
                    'fields' => implode(',', $this->fields),
                    'fmt' => 'json',
                    'min_rnk' => $min,
                    'max_rnk' => $min + $size - 1,
                ]);
                $data = json_decode(file_get_contents($url . '?' . $params), true);
                yield from $data['StudyFieldsResponse']['StudyFields'];
                $min += $size;
            } while ($data['StudyFieldsResponse']['MaxRank'] < $data['StudyFieldsResponse']['NStudiesFound']);
        }
    }
    

    Now you can pass your search field as the argument:

    foreach (new Trials('aspirin') as $trial) {
        printf("%4d %sn", $trial['Rank'], $trial['NCTId'][0]);
    }
    

    And if you want to override the default fields returned, you can do:

    foreach (new Trials('lipitor', ['BriefTitle', 'NCTId']) as $trial) {
        printf("%4d %sn", $trial['Rank'], $trial['NCTId'][0]);
    }
    
    Login or Signup to reply.
Please signup or login to give your own answer.
Back To Top
Search