Initial import

This is the first attempt to track who's making DNS queries.
2 years ago · 5cc4863953
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,4 @@
 # artifacts
 *.o
 /iptraffic
 /log
--- a/+ 6
+++ b/+ 6
@@ -0,0 +1,6 @@
 .PHONY: run
 run: iptraffic
 	./iptraffic 2> log # 2>&1 | head -n 20

 iptraffic: iptraffic.cpp
 	j++ -o $@ $@.cpp
--- a/iptraffic.cpp
+++ b/iptraffic.cpp
@@ -0,0 +1,415 @@
 //////////////////////////////////////////////////////////////////////
 // IP traffic analyzer
 // Written by Jonathan A. Foster <ChipMaster@YeOlPiShack.net>
 // Started April 23rd, 2021
 //
 // The idea is to analyze iptables LOG entries in combination with
 // DNSmasq's query log entries and combine them to list the hosts
 // that were accessed. The main reasons for not just inspecting HTTP
 // packets through a netfilter socket is due to httpS hiding the
 // "host" field. So I'm deducing based on DNS query timing.
 //////////////////////////////////////////////////////////////////////

 //////////////////////////////////////////////////////////////////////
 // AAdditional Router setup:
 //
 // ipset -N evilhosts iphash
 // ipset -N evilnets nethash
 //////////////////////////////////////////////////////////////////////

 //////////////////////////////////////////////////////////////////////
 // Obvious ignores:
 //
 // 10.10.10.1 -> 134.215.160.1 ICMP[8]
 //
 //////////////////////////////////////////////////////////////////////
 // TODO: wildcard for name matching
 // TODO: map names according to time and host. time is probably automatic

 #include <string.h>
 #include <string>
 #include <iostream>
 #include <fstream>
 #include <stdexcept>
 #include <vector>
 #include <map>
 using namespace std;



 //////////////////////////////////////////////////////////////////////
 // Splits: a util class to devide a line into space sep pieces
 //////////////////////////////////////////////////////////////////////
 // TODO: implement begin() + end() to make "for( : )" work

 struct Splits {

  /// CONFIG ///

  enum  { FieldMax=256, LineMax=1024 };

  /// properties ///

  char  line[LineMax];    // Line buffer
  int   len;              // Length of line (after split())
  char  sep;              // Separator character.
  bool  combine;          // Treat multiple consecutive seps as one (combine)
  char *fields[FieldMax]; // pointers to fields in line
  int   count;            // How many fields there were

  // construct
  Splits(): count(0), len(0), sep(' '), combine(true) { line[LineMax-1] = 0; }

  // Convert field[] to string
  inline string operator[](int i) const { string s(fields[i]); return s; }

  // split line. Returns count.
  int split() {
    len = count = 0;
    if(!*line) return count;
    fields[0] = line;
    while(len<LineMax && line[len]) {
      if(line[len]==sep) {
        line[len++]=0;
        if(combine) while(len<LineMax && line[len]==sep) len++;
        if(++count<FieldMax) {
          // this shouldn't happen
          if(len>=LineMax) throw
            runtime_error("Splits::split: end of buffer null missing!");
          fields[count] = line+len;
        } else
          throw runtime_error("Splits::split: Too many fields in the line");
      } else
        len++;
    }
    return count++;
  }
 };

 // istream >> operator: getline() + .split()
 istream &operator>>(istream &in, Splits &sp) {
  if(in.getline(sp.line, sp.LineMax-1)) sp.split();
  return in;
 }



 //////////////////////////////////////////////////////////////////////
 // TSV version of Splits
 //////////////////////////////////////////////////////////////////////

 struct TSV: public Splits {
  TSV() { sep='\t'; combine=false; }
 };




 //////////////////////////////////////////////////////////////////////
 // Subclass to match a list of prefixes against a string
 //
 // This is not a subclass per-se since I don't know how to expose the
 // inherited { } list based constructor and I just want to knock this
 // out. So this is Q&D: typedef + operator.
 //////////////////////////////////////////////////////////////////////

 typedef vector<string> PreMatch;
 bool operator==(const PreMatch &list, const string &s) {
  for(auto p: list) if(s.substr(0, p.size())==p) return true;
  return false;
 }
 inline bool operator!=(const PreMatch &list, const string &s) { return !(list==s); }



 //////////////////////////////////////////////////////////////////////
 // Connection between "us" and "them"
 //////////////////////////////////////////////////////////////////////
 typedef unsigned short word;
 struct Conn {
  string us;        // address on our side
  word   us_port;   // the port on our side
  string them;      // address on their side
  word   them_port; // the port on their side
  string name;      // name of the address
  string protocol;  // protocol used to communicate
  bool   in;        // whether this was an inward bound connection.

  Conn(): in(false) {}
  Conn &clear() { us = them = name = protocol = ""; in=false; us_port = them_port = 0;  }

  // swap polarity of record
  Conn &swap() {
    string s;
    int x;
    s = us;
    us = them;
    them =s;
    x = us_port;
    us_port = them_port;
    them_port = x;
    in=!in;
    return *this;
  }

  // scan & copy data from log record in
  Conn &operator=(const Splits &sp) {
    int x;
    clear();
    for(x=0; x<sp.count; x++) {
      if(!strncmp(sp.fields[x], "SRC=", 4)) {
        us = sp.fields[x]+4;
        continue;
      }
      if(!strncmp(sp.fields[x], "DST=", 4)) {
        them = sp.fields[x]+4;
        continue;
      }
      if(!strncmp(sp.fields[x], "SPT=", 4)) {
        us_port = atoi(sp.fields[x]+4);
        continue;
      }
      if(!strncmp(sp.fields[x], "DPT=", 4)) {
        them_port = atoi(sp.fields[x]+4);
        continue;
      }
      if(!strncmp(sp.fields[x], "TYPE=", 5) && protocol=="ICMP") {
        us_port = them_port = atoi(sp.fields[x]+5);
        continue;
      }
      if(!strncmp(sp.fields[x], "PROTO=", 6))
        protocol = sp.fields[x]+6;
    }
  }

  // TODO: does < > have any actual meaning in this context?
  int cmp(const Conn &gtr) const {
    if(us<gtr.us) return -1;
    if(us>gtr.us) return  1;
    // TODO: auto-wildcard port based on in?
    if(us_port && gtr.us_port) { // 0 = no comparison wildcard
      if(us_port<gtr.us_port) return -1;
      if(us_port>gtr.us_port) return  1;
    }
    if(them<gtr.them) return -1;
    if(them>gtr.them) return  1;
    if(them_port && gtr.them_port) { // 0 = no comparison wildcard
      if(them_port<gtr.them_port) return -1;
      if(them_port>gtr.them_port) return  1;
    }
    // TODO:  do we want to consider the name?
    if(name!="") {
      if(name<gtr.name) return -1;
      if(name>gtr.name) return  1;
    }
    if(protocol<gtr.protocol) return -1;
    if(protocol>gtr.protocol) return  1;
    if(in<gtr.in) return -1;
    if(in>gtr.in) return  1;
    return 0;
  }

  inline bool  operator<(const Conn &gtr) const { return cmp(gtr) <0; }
  inline bool operator<=(const Conn &gtr) const { return cmp(gtr)<=0; }
  inline bool  operator>(const Conn &gtr) const { return cmp(gtr) >0; }
  inline bool operator>=(const Conn &gtr) const { return cmp(gtr)>=0; }
  inline bool operator==(const Conn &gtr) const { return cmp(gtr)==0; }
  inline bool operator!=(const Conn &gtr) const { return cmp(gtr)!=0; }

 };

 // A text output of this record
 ostream &operator<<(ostream &out, const Conn &c) {
  out << c.us
      << ( c.in ? " <- " : " -> " )
      << c.them
      << " " << c.protocol
      << "[" << ( c.in ? c.us_port : c.them_port ) << "] "
      << c.name;
  return out;
 }

 // Copy data from TSV in
 const TSV &operator>>(const TSV &tsv, Conn &conn) {
  if(tsv.count<7) throw runtime_error("Conn=TSV: too few columns");
  conn.clear();
  conn.us        = tsv[0];
  conn.us_port   = atoi(tsv.fields[1]);
  conn.them      = tsv[2];
  conn.them_port = atoi(tsv.fields[3]);
  conn.name      = tsv[4];
  conn.protocol  = tsv[5];
  conn.in        = tsv[6]=="1";
  return tsv;
 }



 //////////////////////////////////////////////////////////////////////
 // List of connections
 //////////////////////////////////////////////////////////////////////

 struct ConnList: public vector<Conn> {
  int find(Conn &needle) {
    int r;
    for(r=0; r<size(); r++) if((*this)[r]==needle) return r;
    return -1;
  }
 };



 //////////////////////////////////////////////////////////////////////
 // Busy indicator aka. "Live Bug"
 //////////////////////////////////////////////////////////////////////

 struct LiveBug {
  string seq = "-\\|/";
  char pre = '\r';
  int p;
  LiveBug(): p(0) {}
  inline char next() { if(p>=seq.size()) p=0; return seq[p++]; }
 };
 ostream &operator<<(ostream &o, LiveBug &bug) {
  return o << bug.pre << bug.next();
 }



 //////////////////////////////////////////////////////////////////////
 // Roll through file
 //////////////////////////////////////////////////////////////////////
 //#define DEBUG

 typedef map<string,string> NameVal;

 const PreMatch us = { "10.10.10.", "192.168.255.", "2001:470:a:169:" };
 const PreMatch dns_ignore = { "v=spf1", "https:" };
 const PreMatch dns_del = { "NODATA-", "NXDOMAIN-" };
 #define PATH "/srv/backups/iptraffic"
 ifstream log(PATH "/test.log");
 ofstream out(PATH "/processed.log");
 Splits ln;
 int lnno = 0, ict = 0;
 LiveBug bug;
 NameVal rdns, queries;
 NameVal::iterator nvp;
 string name, address, s;
 Conn conn;
 bool match;
 ConnList ignores;



 void dlog(const string msg) {
  cerr << "\r" << lnno << ": " << msg << endl;
 }



 int main(int argc, char **argv) {

  /// Read in ignore list ///

  {
    TSV tsv;
    ifstream in(PATH "/ignores.lst");
    while(in >> tsv) {
      if(tsv.count>6) {
        tsv >> conn;
        ignores.push_back(conn);
      }
    }
  }

  /// parse log file ///

  while((log >> ln)) {
    lnno++;
    cout << bug << " " << lnno << flush;

    ///  DNS query result ///

    // TODO: need to get more specific on tying us + them + time to DNS
    if(ln.count>8 && strncmp(ln.fields[4], "dnsmasq[", 8)==0) {

      /// Query send ///

      if(strncmp(ln.fields[5], "query[", 6)==0) {
        s=ln[5].substr(6, ln[5].size()-7);
        if(s!="A" && s!="AAAA") continue; // we're only concerned with addresses.
        name = ln[6];
        address = ln[8];
        dlog("Query["+s+"] '"+name+"' for "+address);
        name+=':'+s;
        if(queries.find(name)==queries.end())
          queries[name]=address;
        else
          dlog("WARN: Query already exists!");

      /// Query reply ///

      } else if(ln[5]=="reply") {
        name = ln[6];
        address = ln[8];
        // Hmm... is this reply an address?
        if(dns_ignore==address) continue; // nope
        if(dns_del==address) {
          // "no exist" reply so just drop them.
               if(*(address.end()-1)=='4')  name+=":A";
          else if(*(address.end()-1)=='6')  name+=":AAAA";
          else                              continue;
          dlog("drop query '"+name+"'");
          if((nvp=queries.find(name))!=queries.end()) queries.erase(nvp);
          continue;
        }
        // IPv6 or v4 query?
        if(address.find(':')==name.npos)
          s=name+":AAAA";
        else
          s=name+":A";
        // now make source dest couplet
        if((nvp=queries.find(s))!=queries.end()) {
          address+=':'+nvp->second;
          //queries.erase(nvp); // remove from active query list
          if((nvp=rdns.find(address))!=rdns.end()) {
            if(nvp->second==name) continue;
 #ifdef DEBUG
            dlog("WARN: DNS address overlap "+address+": "+rdnsp->second+" : "+name);
 #endif
          }
          rdns[address] = name;
          dlog("Added "+address+" = "+name);
 #ifdef DEBUG
          cout '\r' << lnno << ": " << name << endl;
 #endif
          continue;
        }
        dlog("WARN: reply '"+name+"' skipped due to lack of matching query");
        continue;
      }
    }

    /// process connections ///

    if(ln.count>5
    && ln[4]=="kernel:"
    && ln[5]=="ACCEPT"
    ) {
      conn = ln;
      if(us!=conn.us) conn.swap();
      if((nvp=rdns.find(conn.them+':'+conn.us))!=rdns.end())
        conn.name = nvp->second;
      if(ignores.find(conn)<0)
        out << conn << "\n";
      else
        ict++;
    }
  }
  cout << "\nIgnored: " << ict << endl;

 #ifdef DEBUG
  cout << "\n\n" << "Total rDNS: " << rdns.size() << "\n";
 #endif
  return 0;
 }