1 // DNS server implementation.
19 // Default maximum number of TCP queries before we close the socket.
20 const maxTCPQueries = 128
22 // The maximum number of idle workers.
24 // This controls the maximum number of workers that are allowed to stay
25 // idle waiting for incoming requests before being torn down.
27 // If this limit is reached, the server will just keep spawning new
28 // workers (goroutines) for each incoming request. In this case, each
29 // worker will only be used for a single request.
30 const maxIdleWorkersCount = 10000
32 // The maximum length of time a worker may idle for before being destroyed.
33 const idleWorkerTimeout = 10 * time.Second
35 // aLongTimeAgo is a non-zero time, far in the past, used for
36 // immediate cancelation of network operations.
37 var aLongTimeAgo = time.Unix(1, 0)
39 // Handler is implemented by any value that implements ServeDNS.
40 type Handler interface {
41 ServeDNS(w ResponseWriter, r *Msg)
44 // The HandlerFunc type is an adapter to allow the use of
45 // ordinary functions as DNS handlers. If f is a function
46 // with the appropriate signature, HandlerFunc(f) is a
47 // Handler object that calls f.
48 type HandlerFunc func(ResponseWriter, *Msg)
50 // ServeDNS calls f(w, r).
51 func (f HandlerFunc) ServeDNS(w ResponseWriter, r *Msg) {
55 // A ResponseWriter interface is used by an DNS handler to
56 // construct an DNS response.
57 type ResponseWriter interface {
58 // LocalAddr returns the net.Addr of the server
60 // RemoteAddr returns the net.Addr of the client that sent the current request.
62 // WriteMsg writes a reply back to the client.
64 // Write writes a raw buffer back to the client.
65 Write([]byte) (int, error)
66 // Close closes the connection.
68 // TsigStatus returns the status of the Tsig.
70 // TsigTimersOnly sets the tsig timers only boolean.
72 // Hijack lets the caller take over the connection.
73 // After a call to Hijack(), the DNS package will not do anything with the connection.
77 // A ConnectionStater interface is used by a DNS Handler to access TLS connection state
79 type ConnectionStater interface {
80 ConnectionState() *tls.ConnectionState
83 type response struct {
85 closed bool // connection has been closed
86 hijacked bool // connection has been hijacked by handler
90 tsigSecret map[string]string // the tsig secrets
91 udp *net.UDPConn // i/o connection if UDP was used
92 tcp net.Conn // i/o connection if TCP was used
93 udpSession *SessionUDP // oob data to get egress interface right
94 writer Writer // writer to output the raw DNS bits
95 wg *sync.WaitGroup // for gracefull shutdown
98 // HandleFailed returns a HandlerFunc that returns SERVFAIL for every request it gets.
99 func HandleFailed(w ResponseWriter, r *Msg) {
101 m.SetRcode(r, RcodeServerFailure)
102 // does not matter if this write fails
106 // ListenAndServe Starts a server on address and network specified Invoke handler
107 // for incoming queries.
108 func ListenAndServe(addr string, network string, handler Handler) error {
109 server := &Server{Addr: addr, Net: network, Handler: handler}
110 return server.ListenAndServe()
113 // ListenAndServeTLS acts like http.ListenAndServeTLS, more information in
114 // http://golang.org/pkg/net/http/#ListenAndServeTLS
115 func ListenAndServeTLS(addr, certFile, keyFile string, handler Handler) error {
116 cert, err := tls.LoadX509KeyPair(certFile, keyFile)
121 config := tls.Config{
122 Certificates: []tls.Certificate{cert},
132 return server.ListenAndServe()
135 // ActivateAndServe activates a server with a listener from systemd,
136 // l and p should not both be non-nil.
137 // If both l and p are not nil only p will be used.
138 // Invoke handler for incoming queries.
139 func ActivateAndServe(l net.Listener, p net.PacketConn, handler Handler) error {
140 server := &Server{Listener: l, PacketConn: p, Handler: handler}
141 return server.ActivateAndServe()
144 // Writer writes raw DNS messages; each call to Write should send an entire message.
145 type Writer interface {
149 // Reader reads raw DNS messages; each call to ReadTCP or ReadUDP should return an entire message.
150 type Reader interface {
151 // ReadTCP reads a raw message from a TCP connection. Implementations may alter
152 // connection properties, for example the read-deadline.
153 ReadTCP(conn net.Conn, timeout time.Duration) ([]byte, error)
154 // ReadUDP reads a raw message from a UDP connection. Implementations may alter
155 // connection properties, for example the read-deadline.
156 ReadUDP(conn *net.UDPConn, timeout time.Duration) ([]byte, *SessionUDP, error)
159 // defaultReader is an adapter for the Server struct that implements the Reader interface
160 // using the readTCP and readUDP func of the embedded Server.
161 type defaultReader struct {
165 func (dr defaultReader) ReadTCP(conn net.Conn, timeout time.Duration) ([]byte, error) {
166 return dr.readTCP(conn, timeout)
169 func (dr defaultReader) ReadUDP(conn *net.UDPConn, timeout time.Duration) ([]byte, *SessionUDP, error) {
170 return dr.readUDP(conn, timeout)
173 // DecorateReader is a decorator hook for extending or supplanting the functionality of a Reader.
174 // Implementations should never return a nil Reader.
175 type DecorateReader func(Reader) Reader
177 // DecorateWriter is a decorator hook for extending or supplanting the functionality of a Writer.
178 // Implementations should never return a nil Writer.
179 type DecorateWriter func(Writer) Writer
181 // A Server defines parameters for running an DNS server.
183 // Address to listen on, ":dns" if empty.
185 // if "tcp" or "tcp-tls" (DNS over TLS) it will invoke a TCP listener, otherwise an UDP one
187 // TCP Listener to use, this is to aid in systemd's socket activation.
188 Listener net.Listener
189 // TLS connection configuration
190 TLSConfig *tls.Config
191 // UDP "Listener" to use, this is to aid in systemd's socket activation.
192 PacketConn net.PacketConn
193 // Handler to invoke, dns.DefaultServeMux if nil.
195 // Default buffer size to use to read incoming UDP messages. If not set
196 // it defaults to MinMsgSize (512 B).
198 // The net.Conn.SetReadTimeout value for new connections, defaults to 2 * time.Second.
199 ReadTimeout time.Duration
200 // The net.Conn.SetWriteTimeout value for new connections, defaults to 2 * time.Second.
201 WriteTimeout time.Duration
202 // TCP idle timeout for multiple queries, if nil, defaults to 8 * time.Second (RFC 5966).
203 IdleTimeout func() time.Duration
204 // Secret(s) for Tsig map[<zonename>]<base64 secret>. The zonename must be in canonical form (lowercase, fqdn, see RFC 4034 Section 6.2).
205 TsigSecret map[string]string
206 // If NotifyStartedFunc is set it is called once the server has started listening.
207 NotifyStartedFunc func()
208 // DecorateReader is optional, allows customization of the process that reads raw DNS messages.
209 DecorateReader DecorateReader
210 // DecorateWriter is optional, allows customization of the process that writes raw DNS messages.
211 DecorateWriter DecorateWriter
212 // Maximum number of TCP queries before we close the socket. Default is maxTCPQueries (unlimited if -1).
214 // Whether to set the SO_REUSEPORT socket option, allowing multiple listeners to be bound to a single address.
215 // It is only supported on go1.11+ and when using ListenAndServe.
217 // AcceptMsgFunc will check the incoming message and will reject it early in the process.
218 // By default DefaultMsgAcceptFunc will be used.
219 MsgAcceptFunc MsgAcceptFunc
221 // UDP packet or TCP connection queue
229 shutdown chan struct{}
230 conns map[net.Conn]struct{}
232 // A pool for UDP message buffers.
236 func (srv *Server) isStarted() bool {
238 started := srv.started
243 func (srv *Server) worker(w *response) {
247 count := atomic.LoadInt32(&srv.workersCount)
248 if count > maxIdleWorkersCount {
251 if atomic.CompareAndSwapInt32(&srv.workersCount, count, count+1) {
256 defer atomic.AddInt32(&srv.workersCount, -1)
259 timeout := time.NewTimer(idleWorkerTimeout)
264 case w, ok := <-srv.queue:
275 timeout.Reset(idleWorkerTimeout)
280 func (srv *Server) spawnWorker(w *response) {
288 func makeUDPBuffer(size int) func() interface{} {
289 return func() interface{} {
290 return make([]byte, size)
294 func (srv *Server) init() {
295 srv.queue = make(chan *response)
297 srv.shutdown = make(chan struct{})
298 srv.conns = make(map[net.Conn]struct{})
300 if srv.UDPSize == 0 {
301 srv.UDPSize = MinMsgSize
303 if srv.MsgAcceptFunc == nil {
304 srv.MsgAcceptFunc = defaultMsgAcceptFunc
307 srv.udpPool.New = makeUDPBuffer(srv.UDPSize)
310 func unlockOnce(l sync.Locker) func() {
312 return func() { once.Do(l.Unlock) }
315 // ListenAndServe starts a nameserver on the configured address in *Server.
316 func (srv *Server) ListenAndServe() error {
317 unlock := unlockOnce(&srv.lock)
322 return &Error{err: "server already started"}
331 defer close(srv.queue)
334 case "tcp", "tcp4", "tcp6":
335 l, err := listenTCP(srv.Net, addr, srv.ReusePort)
342 return srv.serveTCP(l)
343 case "tcp-tls", "tcp4-tls", "tcp6-tls":
344 if srv.TLSConfig == nil || (len(srv.TLSConfig.Certificates) == 0 && srv.TLSConfig.GetCertificate == nil) {
345 return errors.New("dns: neither Certificates nor GetCertificate set in Config")
347 network := strings.TrimSuffix(srv.Net, "-tls")
348 l, err := listenTCP(network, addr, srv.ReusePort)
352 l = tls.NewListener(l, srv.TLSConfig)
356 return srv.serveTCP(l)
357 case "udp", "udp4", "udp6":
358 l, err := listenUDP(srv.Net, addr, srv.ReusePort)
362 u := l.(*net.UDPConn)
363 if e := setUDPSocketOptions(u); e != nil {
369 return srv.serveUDP(u)
371 return &Error{err: "bad network"}
374 // ActivateAndServe starts a nameserver with the PacketConn or Listener
375 // configured in *Server. Its main use is to start a server from systemd.
376 func (srv *Server) ActivateAndServe() error {
377 unlock := unlockOnce(&srv.lock)
382 return &Error{err: "server already started"}
386 defer close(srv.queue)
388 pConn := srv.PacketConn
391 // Check PacketConn interface's type is valid and value
393 if t, ok := pConn.(*net.UDPConn); ok && t != nil {
394 if e := setUDPSocketOptions(t); e != nil {
399 return srv.serveUDP(t)
405 return srv.serveTCP(l)
407 return &Error{err: "bad listeners"}
410 // Shutdown shuts down a server. After a call to Shutdown, ListenAndServe and
411 // ActivateAndServe will return.
412 func (srv *Server) Shutdown() error {
413 return srv.ShutdownContext(context.Background())
416 // ShutdownContext shuts down a server. After a call to ShutdownContext,
417 // ListenAndServe and ActivateAndServe will return.
419 // A context.Context may be passed to limit how long to wait for connections
421 func (srv *Server) ShutdownContext(ctx context.Context) error {
425 return &Error{err: "server not started"}
430 if srv.PacketConn != nil {
431 srv.PacketConn.SetReadDeadline(aLongTimeAgo) // Unblock reads
434 if srv.Listener != nil {
438 for rw := range srv.conns {
439 rw.SetReadDeadline(aLongTimeAgo) // Unblock reads
444 if testShutdownNotify != nil {
445 testShutdownNotify.Broadcast()
455 if srv.PacketConn != nil {
456 srv.PacketConn.Close()
462 var testShutdownNotify *sync.Cond
464 // getReadTimeout is a helper func to use system timeout if server did not intend to change it.
465 func (srv *Server) getReadTimeout() time.Duration {
466 if srv.ReadTimeout != 0 {
467 return srv.ReadTimeout
472 // serveTCP starts a TCP listener for the server.
473 func (srv *Server) serveTCP(l net.Listener) error {
476 if srv.NotifyStartedFunc != nil {
477 srv.NotifyStartedFunc()
480 var wg sync.WaitGroup
486 for srv.isStarted() {
487 rw, err := l.Accept()
489 if !srv.isStarted() {
492 if neterr, ok := err.(net.Error); ok && neterr.Temporary() {
498 // Track the connection to allow unblocking reads on shutdown.
499 srv.conns[rw] = struct{}{}
502 srv.spawnWorker(&response{
503 tsigSecret: srv.TsigSecret,
512 // serveUDP starts a UDP listener for the server.
513 func (srv *Server) serveUDP(l *net.UDPConn) error {
516 if srv.NotifyStartedFunc != nil {
517 srv.NotifyStartedFunc()
520 reader := Reader(defaultReader{srv})
521 if srv.DecorateReader != nil {
522 reader = srv.DecorateReader(reader)
525 var wg sync.WaitGroup
531 rtimeout := srv.getReadTimeout()
532 // deadline is not used here
533 for srv.isStarted() {
534 m, s, err := reader.ReadUDP(l, rtimeout)
536 if !srv.isStarted() {
539 if netErr, ok := err.(net.Error); ok && netErr.Temporary() {
544 if len(m) < headerSize {
545 if cap(m) == srv.UDPSize {
546 srv.udpPool.Put(m[:srv.UDPSize])
551 srv.spawnWorker(&response{
553 tsigSecret: srv.TsigSecret,
563 func (srv *Server) serve(w *response) {
564 if srv.DecorateWriter != nil {
565 w.writer = srv.DecorateWriter(w)
584 delete(srv.conns, w.tcp)
590 reader := Reader(defaultReader{srv})
591 if srv.DecorateReader != nil {
592 reader = srv.DecorateReader(reader)
595 idleTimeout := tcpIdleTimeout
596 if srv.IdleTimeout != nil {
597 idleTimeout = srv.IdleTimeout()
600 timeout := srv.getReadTimeout()
602 limit := srv.MaxTCPQueries
604 limit = maxTCPQueries
607 for q := 0; (q < limit || limit == -1) && srv.isStarted(); q++ {
609 w.msg, err = reader.ReadTCP(w.tcp, timeout)
611 // TODO(tmthrgd): handle error
616 break // Close() was called
619 break // client will call Close() themselves
621 // The first read uses the read timeout, the rest use the
623 timeout = idleTimeout
627 func (srv *Server) disposeBuffer(w *response) {
628 if w.udp != nil && cap(w.msg) == srv.UDPSize {
629 srv.udpPool.Put(w.msg[:srv.UDPSize])
634 func (srv *Server) serveDNS(w *response) {
635 dh, off, err := unpackMsgHdr(w.msg, 0)
637 // Let client hang, they are sending crap; any reply can be used to amplify.
644 switch srv.MsgAcceptFunc(dh) {
649 req.SetRcodeFormatError(req)
650 // Are we allowed to delete any OPT records here?
651 req.Ns, req.Answer, req.Extra = nil, nil, nil
658 if err := req.unpack(dh, w.msg, off); err != nil {
659 req.SetRcodeFormatError(req)
660 req.Ns, req.Answer, req.Extra = nil, nil, nil
668 if w.tsigSecret != nil {
669 if t := req.IsTsig(); t != nil {
670 if secret, ok := w.tsigSecret[t.Hdr.Name]; ok {
671 w.tsigStatus = TsigVerify(w.msg, secret, "", false)
673 w.tsigStatus = ErrSecret
675 w.tsigTimersOnly = false
676 w.tsigRequestMAC = req.Extra[len(req.Extra)-1].(*TSIG).MAC
682 handler := srv.Handler
684 handler = DefaultServeMux
687 handler.ServeDNS(w, req) // Writes back to the client
690 func (srv *Server) readTCP(conn net.Conn, timeout time.Duration) ([]byte, error) {
691 // If we race with ShutdownContext, the read deadline may
692 // have been set in the distant past to unblock the read
693 // below. We must not override it, otherwise we may block
697 conn.SetReadDeadline(time.Now().Add(timeout))
702 n, err := conn.Read(l)
703 if err != nil || n != 2 {
707 return nil, ErrShortRead
709 length := binary.BigEndian.Uint16(l)
711 return nil, ErrShortRead
713 m := make([]byte, int(length))
714 n, err = conn.Read(m[:int(length)])
715 if err != nil || n == 0 {
719 return nil, ErrShortRead
722 for i < int(length) {
723 j, err := conn.Read(m[i:int(length)])
734 func (srv *Server) readUDP(conn *net.UDPConn, timeout time.Duration) ([]byte, *SessionUDP, error) {
737 // See the comment in readTCP above.
738 conn.SetReadDeadline(time.Now().Add(timeout))
742 m := srv.udpPool.Get().([]byte)
743 n, s, err := ReadFromSessionUDP(conn, m)
752 // WriteMsg implements the ResponseWriter.WriteMsg method.
753 func (w *response) WriteMsg(m *Msg) (err error) {
755 return &Error{err: "WriteMsg called after Close"}
759 if w.tsigSecret != nil { // if no secrets, dont check for the tsig (which is a longer check)
760 if t := m.IsTsig(); t != nil {
761 data, w.tsigRequestMAC, err = TsigGenerate(m, w.tsigSecret[t.Hdr.Name], w.tsigRequestMAC, w.tsigTimersOnly)
765 _, err = w.writer.Write(data)
773 _, err = w.writer.Write(data)
777 // Write implements the ResponseWriter.Write method.
778 func (w *response) Write(m []byte) (int, error) {
780 return 0, &Error{err: "Write called after Close"}
785 return WriteToSessionUDP(w.udp, m, w.udpSession)
789 return 0, io.ErrShortBuffer
792 return 0, &Error{err: "message too large"}
794 l := make([]byte, 2, 2+lm)
795 binary.BigEndian.PutUint16(l, uint16(lm))
798 n, err := io.Copy(w.tcp, bytes.NewReader(m))
801 panic("dns: internal error: udp and tcp both nil")
805 // LocalAddr implements the ResponseWriter.LocalAddr method.
806 func (w *response) LocalAddr() net.Addr {
809 return w.udp.LocalAddr()
811 return w.tcp.LocalAddr()
813 panic("dns: internal error: udp and tcp both nil")
817 // RemoteAddr implements the ResponseWriter.RemoteAddr method.
818 func (w *response) RemoteAddr() net.Addr {
820 case w.udpSession != nil:
821 return w.udpSession.RemoteAddr()
823 return w.tcp.RemoteAddr()
825 panic("dns: internal error: udpSession and tcp both nil")
829 // TsigStatus implements the ResponseWriter.TsigStatus method.
830 func (w *response) TsigStatus() error { return w.tsigStatus }
832 // TsigTimersOnly implements the ResponseWriter.TsigTimersOnly method.
833 func (w *response) TsigTimersOnly(b bool) { w.tsigTimersOnly = b }
835 // Hijack implements the ResponseWriter.Hijack method.
836 func (w *response) Hijack() { w.hijacked = true }
838 // Close implements the ResponseWriter.Close method
839 func (w *response) Close() error {
841 return &Error{err: "connection already closed"}
847 // Can't close the udp conn, as that is actually the listener.
852 panic("dns: internal error: udp and tcp both nil")
856 // ConnectionState() implements the ConnectionStater.ConnectionState() interface.
857 func (w *response) ConnectionState() *tls.ConnectionState {
858 type tlsConnectionStater interface {
859 ConnectionState() tls.ConnectionState
861 if v, ok := w.tcp.(tlsConnectionStater); ok {
862 t := v.ConnectionState()