|
- //////////////////////////////////////////////////////////////////////
- // IP traffic analyzer
- // Written by Jonathan A. Foster <ChipMaster@YeOlPiShack.net>
- // Started April 23rd, 2021
- //
- // The idea is to analyze iptables LOG entries in combination with
- // DNSmasq's query log entries and combine them to list the hosts
- // that were accessed. The main reasons for not just inspecting HTTP
- // packets through a netfilter socket is due to httpS hiding the
- // "host" field. So I'm deducing based on DNS query timing.
- //////////////////////////////////////////////////////////////////////
-
- //////////////////////////////////////////////////////////////////////
- // AAdditional Router setup:
- //
- // ipset -N evilhosts iphash
- // ipset -N evilnets nethash
- //////////////////////////////////////////////////////////////////////
-
- //////////////////////////////////////////////////////////////////////
- // Obvious ignores:
- //
- // 10.10.10.1 -> 134.215.160.1 ICMP[8]
- //
- //////////////////////////////////////////////////////////////////////
- // TODO: wildcard for name matching
- // TODO: map names according to time and host. time is probably automatic
-
- #include <string.h>
- #include <string>
- #include <iostream>
- #include <fstream>
- #include <stdexcept>
- #include <vector>
- #include <map>
- using namespace std;
-
-
-
- //////////////////////////////////////////////////////////////////////
- // Splits: a util class to devide a line into space sep pieces
- //////////////////////////////////////////////////////////////////////
- // TODO: implement begin() + end() to make "for( : )" work
-
- struct Splits {
-
- /// CONFIG ///
-
- enum { FieldMax=256, LineMax=1024 };
-
- /// properties ///
-
- char line[LineMax]; // Line buffer
- int len; // Length of line (after split())
- char sep; // Separator character.
- bool combine; // Treat multiple consecutive seps as one (combine)
- char *fields[FieldMax]; // pointers to fields in line
- int count; // How many fields there were
-
- // construct
- Splits(): count(0), len(0), sep(' '), combine(true) { line[LineMax-1] = 0; }
-
- // Convert field[] to string
- inline string operator[](int i) const { string s(fields[i]); return s; }
-
- // split line. Returns count.
- int split() {
- len = count = 0;
- if(!*line) return count;
- fields[0] = line;
- while(len<LineMax && line[len]) {
- if(line[len]==sep) {
- line[len++]=0;
- if(combine) while(len<LineMax && line[len]==sep) len++;
- if(++count<FieldMax) {
- // this shouldn't happen
- if(len>=LineMax) throw
- runtime_error("Splits::split: end of buffer null missing!");
- fields[count] = line+len;
- } else
- throw runtime_error("Splits::split: Too many fields in the line");
- } else
- len++;
- }
- return count++;
- }
- };
-
- // istream >> operator: getline() + .split()
- istream &operator>>(istream &in, Splits &sp) {
- if(in.getline(sp.line, sp.LineMax-1)) sp.split();
- return in;
- }
-
-
-
- //////////////////////////////////////////////////////////////////////
- // TSV version of Splits
- //////////////////////////////////////////////////////////////////////
-
- struct TSV: public Splits {
- TSV() { sep='\t'; combine=false; }
- };
-
-
-
-
- //////////////////////////////////////////////////////////////////////
- // Subclass to match a list of prefixes against a string
- //
- // This is not a subclass per-se since I don't know how to expose the
- // inherited { } list based constructor and I just want to knock this
- // out. So this is Q&D: typedef + operator.
- //////////////////////////////////////////////////////////////////////
-
- typedef vector<string> PreMatch;
- bool operator==(const PreMatch &list, const string &s) {
- for(auto p: list) if(s.substr(0, p.size())==p) return true;
- return false;
- }
- inline bool operator!=(const PreMatch &list, const string &s) { return !(list==s); }
-
-
-
- //////////////////////////////////////////////////////////////////////
- // Connection between "us" and "them"
- //////////////////////////////////////////////////////////////////////
- typedef unsigned short word;
- struct Conn {
- string us; // address on our side
- word us_port; // the port on our side
- string them; // address on their side
- word them_port; // the port on their side
- string name; // name of the address
- string protocol; // protocol used to communicate
- bool in; // whether this was an inward bound connection.
-
- Conn(): in(false) {}
- Conn &clear() { us = them = name = protocol = ""; in=false; us_port = them_port = 0; }
-
- // swap polarity of record
- Conn &swap() {
- string s;
- int x;
- s = us;
- us = them;
- them =s;
- x = us_port;
- us_port = them_port;
- them_port = x;
- in=!in;
- return *this;
- }
-
- // scan & copy data from log record in
- Conn &operator=(const Splits &sp) {
- int x;
- clear();
- for(x=0; x<sp.count; x++) {
- if(!strncmp(sp.fields[x], "SRC=", 4)) {
- us = sp.fields[x]+4;
- continue;
- }
- if(!strncmp(sp.fields[x], "DST=", 4)) {
- them = sp.fields[x]+4;
- continue;
- }
- if(!strncmp(sp.fields[x], "SPT=", 4)) {
- us_port = atoi(sp.fields[x]+4);
- continue;
- }
- if(!strncmp(sp.fields[x], "DPT=", 4)) {
- them_port = atoi(sp.fields[x]+4);
- continue;
- }
- if(!strncmp(sp.fields[x], "TYPE=", 5) && protocol=="ICMP") {
- us_port = them_port = atoi(sp.fields[x]+5);
- continue;
- }
- if(!strncmp(sp.fields[x], "PROTO=", 6))
- protocol = sp.fields[x]+6;
- }
- }
-
- // TODO: does < > have any actual meaning in this context?
- int cmp(const Conn >r) const {
- if(us<gtr.us) return -1;
- if(us>gtr.us) return 1;
- // TODO: auto-wildcard port based on in?
- if(us_port && gtr.us_port) { // 0 = no comparison wildcard
- if(us_port<gtr.us_port) return -1;
- if(us_port>gtr.us_port) return 1;
- }
- if(them<gtr.them) return -1;
- if(them>gtr.them) return 1;
- if(them_port && gtr.them_port) { // 0 = no comparison wildcard
- if(them_port<gtr.them_port) return -1;
- if(them_port>gtr.them_port) return 1;
- }
- // TODO: do we want to consider the name?
- if(name!="") {
- if(name<gtr.name) return -1;
- if(name>gtr.name) return 1;
- }
- if(protocol<gtr.protocol) return -1;
- if(protocol>gtr.protocol) return 1;
- if(in<gtr.in) return -1;
- if(in>gtr.in) return 1;
- return 0;
- }
-
- inline bool operator<(const Conn >r) const { return cmp(gtr) <0; }
- inline bool operator<=(const Conn >r) const { return cmp(gtr)<=0; }
- inline bool operator>(const Conn >r) const { return cmp(gtr) >0; }
- inline bool operator>=(const Conn >r) const { return cmp(gtr)>=0; }
- inline bool operator==(const Conn >r) const { return cmp(gtr)==0; }
- inline bool operator!=(const Conn >r) const { return cmp(gtr)!=0; }
-
- };
-
- // A text output of this record
- ostream &operator<<(ostream &out, const Conn &c) {
- out << c.us
- << ( c.in ? " <- " : " -> " )
- << c.them
- << " " << c.protocol
- << "[" << ( c.in ? c.us_port : c.them_port ) << "] "
- << c.name;
- return out;
- }
-
- // Copy data from TSV in
- const TSV &operator>>(const TSV &tsv, Conn &conn) {
- if(tsv.count<7) throw runtime_error("Conn=TSV: too few columns");
- conn.clear();
- conn.us = tsv[0];
- conn.us_port = atoi(tsv.fields[1]);
- conn.them = tsv[2];
- conn.them_port = atoi(tsv.fields[3]);
- conn.name = tsv[4];
- conn.protocol = tsv[5];
- conn.in = tsv[6]=="1";
- return tsv;
- }
-
-
-
- //////////////////////////////////////////////////////////////////////
- // List of connections
- //////////////////////////////////////////////////////////////////////
-
- struct ConnList: public vector<Conn> {
- int find(Conn &needle) {
- int r;
- for(r=0; r<size(); r++) if((*this)[r]==needle) return r;
- return -1;
- }
- };
-
-
-
- //////////////////////////////////////////////////////////////////////
- // Busy indicator aka. "Live Bug"
- //////////////////////////////////////////////////////////////////////
-
- struct LiveBug {
- string seq = "-\\|/";
- char pre = '\r';
- int p;
- LiveBug(): p(0) {}
- inline char next() { if(p>=seq.size()) p=0; return seq[p++]; }
- };
- ostream &operator<<(ostream &o, LiveBug &bug) {
- return o << bug.pre << bug.next();
- }
-
-
-
- //////////////////////////////////////////////////////////////////////
- // Roll through file
- //////////////////////////////////////////////////////////////////////
- //#define DEBUG
-
- typedef map<string,string> NameVal;
-
- const PreMatch us = { "10.10.10.", "192.168.255.", "2001:470:a:169:" };
- const PreMatch dns_ignore = { "v=spf1", "https:" };
- const PreMatch dns_del = { "NODATA-", "NXDOMAIN-" };
- #define PATH "/srv/backups/iptraffic"
- ifstream log(PATH "/test.log");
- ofstream out(PATH "/processed.log");
- Splits ln;
- int lnno = 0, ict = 0;
- LiveBug bug;
- NameVal rdns, queries;
- NameVal::iterator nvp;
- string name, address, s;
- Conn conn;
- bool match;
- ConnList ignores;
-
-
-
- void dlog(const string msg) {
- cerr << "\r" << lnno << ": " << msg << endl;
- }
-
-
-
- int main(int argc, char **argv) {
-
- /// Read in ignore list ///
-
- {
- TSV tsv;
- ifstream in(PATH "/ignores.lst");
- while(in >> tsv) {
- if(tsv.count>6) {
- tsv >> conn;
- ignores.push_back(conn);
- }
- }
- }
-
- /// parse log file ///
-
- while((log >> ln)) {
- lnno++;
- cout << bug << " " << lnno << flush;
-
- /// DNS query result ///
-
- // TODO: need to get more specific on tying us + them + time to DNS
- if(ln.count>8 && strncmp(ln.fields[4], "dnsmasq[", 8)==0) {
-
- /// Query send ///
-
- if(strncmp(ln.fields[5], "query[", 6)==0) {
- s=ln[5].substr(6, ln[5].size()-7);
- if(s!="A" && s!="AAAA") continue; // we're only concerned with addresses.
- name = ln[6];
- address = ln[8];
- dlog("Query["+s+"] '"+name+"' for "+address);
- name+=':'+s;
- if(queries.find(name)==queries.end())
- queries[name]=address;
- else
- dlog("WARN: Query already exists!");
-
- /// Query reply ///
-
- } else if(ln[5]=="reply") {
- name = ln[6];
- address = ln[8];
- // Hmm... is this reply an address?
- if(dns_ignore==address) continue; // nope
- if(dns_del==address) {
- // "no exist" reply so just drop them.
- if(*(address.end()-1)=='4') name+=":A";
- else if(*(address.end()-1)=='6') name+=":AAAA";
- else continue;
- dlog("drop query '"+name+"'");
- if((nvp=queries.find(name))!=queries.end()) queries.erase(nvp);
- continue;
- }
- // IPv6 or v4 query?
- if(address.find(':')==name.npos)
- s=name+":AAAA";
- else
- s=name+":A";
- // now make source dest couplet
- if((nvp=queries.find(s))!=queries.end()) {
- address+=':'+nvp->second;
- //queries.erase(nvp); // remove from active query list
- if((nvp=rdns.find(address))!=rdns.end()) {
- if(nvp->second==name) continue;
- #ifdef DEBUG
- dlog("WARN: DNS address overlap "+address+": "+rdnsp->second+" : "+name);
- #endif
- }
- rdns[address] = name;
- dlog("Added "+address+" = "+name);
- #ifdef DEBUG
- cout '\r' << lnno << ": " << name << endl;
- #endif
- continue;
- }
- dlog("WARN: reply '"+name+"' skipped due to lack of matching query");
- continue;
- }
- }
-
- /// process connections ///
-
- if(ln.count>5
- && ln[4]=="kernel:"
- && ln[5]=="ACCEPT"
- ) {
- conn = ln;
- if(us!=conn.us) conn.swap();
- if((nvp=rdns.find(conn.them+':'+conn.us))!=rdns.end())
- conn.name = nvp->second;
- if(ignores.find(conn)<0)
- out << conn << "\n";
- else
- ict++;
- }
- }
- cout << "\nIgnored: " << ict << endl;
-
- #ifdef DEBUG
- cout << "\n\n" << "Total rDNS: " << rdns.size() << "\n";
- #endif
- return 0;
- }
|