commit 5cc486395359451ec841cbca76a18b35cffb9e85 Author: Jon Foster Date: Thu May 13 17:53:18 2021 -0700 Initial import This is the first attempt to track who's making DNS queries. diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ec9d440 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +# artifacts +*.o +/iptraffic +/log diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..06d0c7b --- /dev/null +++ b/Makefile @@ -0,0 +1,6 @@ +.PHONY: run +run: iptraffic + ./iptraffic 2> log # 2>&1 | head -n 20 + +iptraffic: iptraffic.cpp + j++ -o $@ $@.cpp diff --git a/iptraffic.cpp b/iptraffic.cpp new file mode 100644 index 0000000..43fb132 --- /dev/null +++ b/iptraffic.cpp @@ -0,0 +1,415 @@ +////////////////////////////////////////////////////////////////////// +// IP traffic analyzer +// Written by Jonathan A. Foster +// Started April 23rd, 2021 +// +// The idea is to analyze iptables LOG entries in combination with +// DNSmasq's query log entries and combine them to list the hosts +// that were accessed. The main reasons for not just inspecting HTTP +// packets through a netfilter socket is due to httpS hiding the +// "host" field. So I'm deducing based on DNS query timing. +////////////////////////////////////////////////////////////////////// + +////////////////////////////////////////////////////////////////////// +// AAdditional Router setup: +// +// ipset -N evilhosts iphash +// ipset -N evilnets nethash +////////////////////////////////////////////////////////////////////// + +////////////////////////////////////////////////////////////////////// +// Obvious ignores: +// +// 10.10.10.1 -> 134.215.160.1 ICMP[8] +// +////////////////////////////////////////////////////////////////////// +// TODO: wildcard for name matching +// TODO: map names according to time and host. time is probably automatic + +#include +#include +#include +#include +#include +#include +#include +using namespace std; + + + +////////////////////////////////////////////////////////////////////// +// Splits: a util class to devide a line into space sep pieces +////////////////////////////////////////////////////////////////////// +// TODO: implement begin() + end() to make "for( : )" work + +struct Splits { + + /// CONFIG /// + + enum { FieldMax=256, LineMax=1024 }; + + /// properties /// + + char line[LineMax]; // Line buffer + int len; // Length of line (after split()) + char sep; // Separator character. + bool combine; // Treat multiple consecutive seps as one (combine) + char *fields[FieldMax]; // pointers to fields in line + int count; // How many fields there were + + // construct + Splits(): count(0), len(0), sep(' '), combine(true) { line[LineMax-1] = 0; } + + // Convert field[] to string + inline string operator[](int i) const { string s(fields[i]); return s; } + + // split line. Returns count. + int split() { + len = count = 0; + if(!*line) return count; + fields[0] = line; + while(len=LineMax) throw + runtime_error("Splits::split: end of buffer null missing!"); + fields[count] = line+len; + } else + throw runtime_error("Splits::split: Too many fields in the line"); + } else + len++; + } + return count++; + } +}; + +// istream >> operator: getline() + .split() +istream &operator>>(istream &in, Splits &sp) { + if(in.getline(sp.line, sp.LineMax-1)) sp.split(); + return in; +} + + + +////////////////////////////////////////////////////////////////////// +// TSV version of Splits +////////////////////////////////////////////////////////////////////// + +struct TSV: public Splits { + TSV() { sep='\t'; combine=false; } +}; + + + + +////////////////////////////////////////////////////////////////////// +// Subclass to match a list of prefixes against a string +// +// This is not a subclass per-se since I don't know how to expose the +// inherited { } list based constructor and I just want to knock this +// out. So this is Q&D: typedef + operator. +////////////////////////////////////////////////////////////////////// + +typedef vector PreMatch; +bool operator==(const PreMatch &list, const string &s) { + for(auto p: list) if(s.substr(0, p.size())==p) return true; + return false; +} +inline bool operator!=(const PreMatch &list, const string &s) { return !(list==s); } + + + +////////////////////////////////////////////////////////////////////// +// Connection between "us" and "them" +////////////////////////////////////////////////////////////////////// +typedef unsigned short word; +struct Conn { + string us; // address on our side + word us_port; // the port on our side + string them; // address on their side + word them_port; // the port on their side + string name; // name of the address + string protocol; // protocol used to communicate + bool in; // whether this was an inward bound connection. + + Conn(): in(false) {} + Conn &clear() { us = them = name = protocol = ""; in=false; us_port = them_port = 0; } + + // swap polarity of record + Conn &swap() { + string s; + int x; + s = us; + us = them; + them =s; + x = us_port; + us_port = them_port; + them_port = x; + in=!in; + return *this; + } + + // scan & copy data from log record in + Conn &operator=(const Splits &sp) { + int x; + clear(); + for(x=0; xgtr.name) return 1; + } + if(protocolgtr.protocol) return 1; + if(ingtr.in) return 1; + return 0; + } + + inline bool operator<(const Conn >r) const { return cmp(gtr) <0; } + inline bool operator<=(const Conn >r) const { return cmp(gtr)<=0; } + inline bool operator>(const Conn >r) const { return cmp(gtr) >0; } + inline bool operator>=(const Conn >r) const { return cmp(gtr)>=0; } + inline bool operator==(const Conn >r) const { return cmp(gtr)==0; } + inline bool operator!=(const Conn >r) const { return cmp(gtr)!=0; } + +}; + +// A text output of this record +ostream &operator<<(ostream &out, const Conn &c) { + out << c.us + << ( c.in ? " <- " : " -> " ) + << c.them + << " " << c.protocol + << "[" << ( c.in ? c.us_port : c.them_port ) << "] " + << c.name; + return out; +} + +// Copy data from TSV in +const TSV &operator>>(const TSV &tsv, Conn &conn) { + if(tsv.count<7) throw runtime_error("Conn=TSV: too few columns"); + conn.clear(); + conn.us = tsv[0]; + conn.us_port = atoi(tsv.fields[1]); + conn.them = tsv[2]; + conn.them_port = atoi(tsv.fields[3]); + conn.name = tsv[4]; + conn.protocol = tsv[5]; + conn.in = tsv[6]=="1"; + return tsv; +} + + + +////////////////////////////////////////////////////////////////////// +// List of connections +////////////////////////////////////////////////////////////////////// + +struct ConnList: public vector { + int find(Conn &needle) { + int r; + for(r=0; r=seq.size()) p=0; return seq[p++]; } +}; +ostream &operator<<(ostream &o, LiveBug &bug) { + return o << bug.pre << bug.next(); +} + + + +////////////////////////////////////////////////////////////////////// +// Roll through file +////////////////////////////////////////////////////////////////////// +//#define DEBUG + +typedef map NameVal; + +const PreMatch us = { "10.10.10.", "192.168.255.", "2001:470:a:169:" }; +const PreMatch dns_ignore = { "v=spf1", "https:" }; +const PreMatch dns_del = { "NODATA-", "NXDOMAIN-" }; +#define PATH "/srv/backups/iptraffic" +ifstream log(PATH "/test.log"); +ofstream out(PATH "/processed.log"); +Splits ln; +int lnno = 0, ict = 0; +LiveBug bug; +NameVal rdns, queries; +NameVal::iterator nvp; +string name, address, s; +Conn conn; +bool match; +ConnList ignores; + + + +void dlog(const string msg) { + cerr << "\r" << lnno << ": " << msg << endl; +} + + + +int main(int argc, char **argv) { + + /// Read in ignore list /// + + { + TSV tsv; + ifstream in(PATH "/ignores.lst"); + while(in >> tsv) { + if(tsv.count>6) { + tsv >> conn; + ignores.push_back(conn); + } + } + } + + /// parse log file /// + + while((log >> ln)) { + lnno++; + cout << bug << " " << lnno << flush; + + /// DNS query result /// + + // TODO: need to get more specific on tying us + them + time to DNS + if(ln.count>8 && strncmp(ln.fields[4], "dnsmasq[", 8)==0) { + + /// Query send /// + + if(strncmp(ln.fields[5], "query[", 6)==0) { + s=ln[5].substr(6, ln[5].size()-7); + if(s!="A" && s!="AAAA") continue; // we're only concerned with addresses. + name = ln[6]; + address = ln[8]; + dlog("Query["+s+"] '"+name+"' for "+address); + name+=':'+s; + if(queries.find(name)==queries.end()) + queries[name]=address; + else + dlog("WARN: Query already exists!"); + + /// Query reply /// + + } else if(ln[5]=="reply") { + name = ln[6]; + address = ln[8]; + // Hmm... is this reply an address? + if(dns_ignore==address) continue; // nope + if(dns_del==address) { + // "no exist" reply so just drop them. + if(*(address.end()-1)=='4') name+=":A"; + else if(*(address.end()-1)=='6') name+=":AAAA"; + else continue; + dlog("drop query '"+name+"'"); + if((nvp=queries.find(name))!=queries.end()) queries.erase(nvp); + continue; + } + // IPv6 or v4 query? + if(address.find(':')==name.npos) + s=name+":AAAA"; + else + s=name+":A"; + // now make source dest couplet + if((nvp=queries.find(s))!=queries.end()) { + address+=':'+nvp->second; + //queries.erase(nvp); // remove from active query list + if((nvp=rdns.find(address))!=rdns.end()) { + if(nvp->second==name) continue; +#ifdef DEBUG + dlog("WARN: DNS address overlap "+address+": "+rdnsp->second+" : "+name); +#endif + } + rdns[address] = name; + dlog("Added "+address+" = "+name); +#ifdef DEBUG + cout '\r' << lnno << ": " << name << endl; +#endif + continue; + } + dlog("WARN: reply '"+name+"' skipped due to lack of matching query"); + continue; + } + } + + /// process connections /// + + if(ln.count>5 + && ln[4]=="kernel:" + && ln[5]=="ACCEPT" + ) { + conn = ln; + if(us!=conn.us) conn.swap(); + if((nvp=rdns.find(conn.them+':'+conn.us))!=rdns.end()) + conn.name = nvp->second; + if(ignores.find(conn)<0) + out << conn << "\n"; + else + ict++; + } + } + cout << "\nIgnored: " << ict << endl; + +#ifdef DEBUG + cout << "\n\n" << "Total rDNS: " << rdns.size() << "\n"; +#endif + return 0; +}