skip to Main Content

I need to sort a txt file by date, I need to work with the info stored in a vector, then sort it. I made a function that opens the file and stores all the lines in the vector "contents". I’m using visual studio code.

The code I got:

#include <iostream>
#include <fstream>
#include <string>
#include <stdio.h>
#include <vector>
using namespace std;


int readFile(){
    ifstream file;
    string filename = "searches.txt";
    string line;

    file.open(filename);

    vector<string> contents;

    while(!file.eof()){
        getline(file, line);
        contents.push_back(line);
    }

    for (auto file_line : contents)
        cout << file_line << endl;
 
    file.close();
    return 0;
}


int main(){
    readFile();
    
    return 0;
};

The actual output looks like this:

Oct 9 10:32:24 423.2.230.77:6166 Failed password for illegal user guest
Aug 28 23:07:49 897.53.984.6:6710 Failed password for root
Aug 4 03:18:56 960.96.3.29:5268 Failed password for admin
Jun 20 13:39:21 118.15.416.57:4486 Failed password for illegal user guest
Jun 2 18:37:41 108.57.27.85:5491 Failed password for illegal user guest
Oct 1 07:22:46 450.25.888.72:5978 Illegal user
Sep 14 10:01:55 694.78.591.36:5537 Failed password for illegal user guest
Jun 16 22:09:01 84.88.502.53:6452 Failed password for illegal user test
Sep 24 17:22:12 311.49.840.89:4145 Failed password for illegal user guest
Jul 12 20:17:38 390.18.339.91:6610 Failed password for root
Jul 16 03:38:22 990.87.715.21:6558 Failed password for admir/
Oct 26 00:53:10 505.13.173.18:4083 Failed password for illegal user guest

How can I sort the vector content by date ?

2

Answers


  1. It is rarely a good (or easy or fast) choice to implement plain text processing (which you need to make the text sortable) in C++. Bash is a much better fit for that purpose. You may still get a C++-like performance thanks to sort, not to mention the scalability of sort (and its capability to sort inputs larger than RAM).

    1. Make the date sortable.
    2. Sort the lines.
    3. Translate the date back to the original format.
    #!/bin/bash
    set -euo pipefail
    
    declare -Ari months=([Jan]=1 [Feb]=2 [Mar]=3 [Apr]=4 [May]=5 [Jun]=6
                         [Jul]=7 [Aug]=8 [Sep]=9 [Oct]=10 [Nov]=11 [Dec]=12)
    declare -a rmonths  # month numbers to month names
    for month in "${!months[@]}"; do rmonths[months["$month"]]="$month"; done
    declare -ar rmonths
    
    # Make the date sortable.
    while read -r month day rest; do
      printf '%02d %02d %sn' "${months["$month"]}" "$day" "$rest"
    done |
    
    # Sort the lines.
    sort |
    
    # Translate the date back to the original format.
    while read -r month day rest; do
      printf '%s %d %sn' "${rmonths[$((10#"$month"))]}" "$((10#"$day"))" "$rest"
    done
    
    Login or Signup to reply.
  2. You can use std::sort to sort all elements in your std::vector<std::string>.

    However, by default, std::sort will call std::string::operator< to determine whether one object is smaller than another. This function will only perform a lexicographical comparison of both strings. This is not what you want. You want the strings to be sorted by the date they contain. In order to prevent std::sort from using std::string::operator<, you will have to write your own comparison function and tell std::sort to use that function instead.

    Here is an example:

    #include <iostream>
    #include <fstream>
    #include <vector>
    #include <map>
    #include <string>
    #include <sstream>
    #include <algorithm>
    
    struct custom_time
    {
        int day;
        int month;
        int hour;
        int minute;
        int second;
    };
    
    void extract_time_from_line( const std::string& line, custom_time& t )
    {
        static const std::map<std::string,int> months =
        {
            {"Jan",1},
            {"Feb",2},
            {"Mar",3},
            {"Apr",4},
            {"May",5},
            {"Jun",6},
            {"Jul",7},
            {"Aug",8},
            {"Sep",9},
            {"Oct",10},
            {"Nov",11},
            {"Dec",12}
        };
    
        std::istringstream ss{line};
        std::string month;
        char delim1, delim2;
    
        //parse the line
        ss >> month >> t.day >> t.hour >> delim1 >> t.minute >> delim2 >> t.second;
    
        //verify that parsing was successful
        if ( !ss || delim1 != ':' || delim2 != ':' )
            throw std::runtime_error( "Error parsing line!" );
    
        //convert month to number
        auto it = months.find( month );
        if ( it == months.end() )
            return throw std::runtime_error( "Invalid month found!n" );
        t.month = it->second;
    }
    
    bool compare( const std::string& a, const std::string& b )
    {
        custom_time ta, tb;
    
        extract_time_from_line( a, ta );
        extract_time_from_line( b, tb );
    
        //first compare by month
        if ( ta.month < tb.month )
            return true;
        if ( ta.month > tb.month )
            return false;
    
        //then compare by day
        if ( ta.day < tb.day )
            return true;
        if ( ta.day > tb.day )
            return false;
    
        //then compare by hour
        if ( ta.hour < tb.hour )
            return true;
        if ( ta.hour > tb.hour )
            return false;
    
        //then compare by minute
        if ( ta.minute < tb.minute )
            return true;
        if ( ta.minute > tb.minute )
            return false;
    
        //then compare by second
        if ( ta.second < tb.second )
            return true;
        if ( ta.second > tb.second )
            return false;
    
        //if both times are identical, return true
        return true;
    }
    
    int main() try
    {
        std::string line;
    
        //open the file
        std::ifstream file( "searches.txt" );
        if ( !file )
            throw std::runtime_error( "Error opening file!" );
    
        std::vector<std::string> contents;
    
        //read the entire file into the vector
        while ( std::getline( file, line ) )
        {
            contents.push_back( line );
        }
    
        //sort the vector
        std::sort( contents.begin(), contents.end(), compare );
    
        //print the sorted content of the vector
        for (auto file_line : contents)
            std::cout << file_line << 'n';
    }
    catch ( const std::runtime_error& e )
    {
        std::cerr << "Exception encountered: " << e.what() << 'n';
    }
    

    For the input stated in the question, this program has the following output:

    Jun 2 18:37:41 108.57.27.85:5491 Failed password for illegal user guest
    Jun 16 22:09:01 84.88.502.53:6452 Failed password for illegal user test
    Jun 20 13:39:21 118.15.416.57:4486 Failed password for illegal user guest
    Jul 12 20:17:38 390.18.339.91:6610 Failed password for root
    Jul 16 03:38:22 990.87.715.21:6558 Failed password for admir/
    Aug 4 03:18:56 960.96.3.29:5268 Failed password for admin
    Aug 28 23:07:49 897.53.984.6:6710 Failed password for root
    Sep 14 10:01:55 694.78.591.36:5537 Failed password for illegal user guest
    Sep 24 17:22:12 311.49.840.89:4145 Failed password for illegal user guest
    Oct 1 07:22:46 450.25.888.72:5978 Illegal user
    Oct 9 10:32:24 423.2.230.77:6166 Failed password for illegal user guest
    Oct 26 00:53:10 505.13.173.18:4083 Failed password for illegal user guest
    

    However, parsing both lines every time std::sort calls the compare function is not very efficient. Therefore, it would be more efficient to parse every line only once, and to cache the parsing results:

    #include <iostream>
    #include <fstream>
    #include <vector>
    #include <map>
    #include <string>
    #include <sstream>
    #include <algorithm>
    
    struct custom_time
    {
        int day;
        int month;
        int hour;
        int minute;
        int second;
    };
    
    void extract_time_from_line( const std::string& line, custom_time& t )
    {
        static const std::map<std::string,int> months =
        {
            {"Jan",1},
            {"Feb",2},
            {"Mar",3},
            {"Apr",4},
            {"May",5},
            {"Jun",6},
            {"Jul",7},
            {"Aug",8},
            {"Sep",9},
            {"Oct",10},
            {"Nov",11},
            {"Dec",12}
        };
    
        std::istringstream ss{line};
        std::string month;
        char delim1, delim2;
    
        //parse the line
        ss >> month >> t.day >> t.hour >> delim1 >> t.minute >> delim2 >> t.second;
    
        //verify that parsing was successful
        if ( !ss || delim1 != ':' || delim2 != ':' )
            throw std::runtime_error( "Error parsing line!" );
    
        //convert month to number
        auto it = months.find( month );
        if ( it == months.end() )
            return throw std::runtime_error( "Invalid month found!n" );
        t.month = it->second;
    }
    
    bool compare( const custom_time& a, const custom_time& b )
    {
        //first compare by month
        if ( a.month < b.month )
            return true;
        if ( a.month > b.month )
            return false;
    
        //then compare by day
        if ( a.day < b.day )
            return true;
        if ( a.day > b.day )
            return false;
    
        //then compare by hour
        if ( a.hour < b.hour )
            return true;
        if ( a.hour > b.hour )
            return false;
    
        //then compare by minute
        if ( a.minute < b.minute )
            return true;
        if ( a.minute > b.minute )
            return false;
    
        //then compare by second
        if ( a.second < b.second )
            return true;
        if ( a.second > b.second )
            return false;
    
        //if both times are identical, return true
        return true;
    }
    
    int main() try
    {
        std::string line;
    
        //open the file
        std::ifstream file( "searches.txt" );
        if ( !file )
            throw std::runtime_error( "Error opening file!" );
    
        struct string_entry
        {
            custom_time time;
            std::string str;
        };
    
        std::vector<string_entry> contents;
    
        //read and parse the entire file into the vector
        while ( std::getline( file, line ) )
        {
            auto& new_elem = contents.emplace_back();
            extract_time_from_line( line, new_elem.time );
            new_elem.str = std::move( line );
        }
    
        //sort the vector
        std::sort(
            contents.begin(), contents.end(),
            [](const string_entry& a, const string_entry& b)
            {
                return compare( a.time, b.time );
            }
        );
    
        //print the sorted content of the vector
        for ( auto& elem : contents )
            std::cout << elem.str << 'n';
    }
    catch ( const std::runtime_error& e )
    {
        std::cerr << "Exception encountered: " << e.what() << 'n';
    }
    

    This second program has the same output as the first program.

    Login or Signup to reply.
Please signup or login to give your own answer.
Back To Top
Search