(load "mm") (in-package :mm) ;;======================================================================= ;; File: train-biases.lisp ;; First three "biased choosers" plus data generation functions ;; for Mastermind course project, CMSC 471, Fall 2011 ;; (c) Marie desJardins, November 2011 ;; VERSION 0.0: LAST UPDATED 11/6/11 ;; WARNING: This code is woefully underdocumented and ;; hastily implemented! ;; Biased learners only operate with 8 pegs and 8 colors. ;; Brittleness alert! - code looks general but may not work ;; with a different number of pegs and/or colors. (setf *code-length* 8) (setf *colors* '(r o y g b i v w)) (export '(train-bias1-pos train-bias1-neg train-bias2-pos train-bias2-neg check-train-bias2 train-bias3-pos train-bias3-neg check-train-bias3 train-bias-flag gen-instances print-to-file generate-training-data)) ;; TRAINING BIAS #1: Only generate codes that use the ;; latter half of the color list (in this case: colors 5 through 8) ;; Notice that to learn this bias, the only feature that is ;; necessary is a set of boolean features has-color(i), for ;; each color i. (defun train-bias1-pos (&optional (colors *colors*) (code-length *code-length*)) "Generate a positive training example for test bias #1 (only use colors 5 through 8)" (setf *code* (loop for i from 1 to code-length collect (nth (+ (random (/ (length colors) 2)) (/ (length colors) 2)) colors)))) (defun train-bias1-neg (&optional (colors *colors*) (code-length *code-length*)) "Generate a negative training example for test bias #1 (i.e., NEVER use colors 5 through 8)" (setf *code* (loop for i from 1 to code-length collect (nth (random (/ (length colors) 2)) colors)))) ;; TRAINING BIAS #2: Use each color that appears in the ;; code *exactly twice*. Notice that this bias implicitly ;; assumes that the code length is even, and that there are ;; at least half as many colors as the length of the code. ;; Notice also that the has_color(i) feature set of training ;; bias #1 will not be adequate to represent this bias. ;; Here you will want to use something like n_color(i), but ;; the learned decision tree will still be rather complex. (defun train-bias2-pos (&optional (colors *colors*) (code-length *code-length*)) (train-bias-flag colors code-length #'check-train-bias2 t)) (defun train-bias2-neg (&optional (colors *colors*) (code-length *code-length*)) (train-bias-flag colors code-length #'check-train-bias2 nil)) (defun check-train-bias2 (code flag) (loop for c in code always (let ((count (count-if #'(lambda (x) (eq c x)) code))) (if flag (member count '(0 2)) (not (member count '(0 2))))))) ;; TRAINING BIAS #3: Use exactly 3 different colors. ;; What's a reasonable feature set for this bias? (defun train-bias3-pos (&optional (colors *colors*) (code-length *code-length*)) (train-bias-flag colors code-length #'check-train-bias3 t)) (defun train-bias3-neg (&optional (colors *colors*) (code-length *code-length*)) (train-bias-flag colors code-length #'check-train-bias3 nil)) (defun check-train-bias3 (code flag &aux (colors nil)) (loop for c in code do (setf colors (adjoin c colors))) (if flag (eq (length colors) 3) (not (eq (length colors) 3)))) ;; TRAIN-BIAS-FLAG (colors code-length test flag) - depending ;; on whether FLAG is T or NIL, return a positive or negative ;; instance of the bias represented by the TEST function. (defun train-bias-flag (colors code-length test flag) "Generate random codes until finding one that does (if flag=T) or does not (flag=NIL) match the conditions of the test function" (loop while t do (progn (setf *code* (mm-gen-random colors code-length)) (if (funcall test *code* flag) (return-from train-bias-flag *code*))))) (defun gen-instances (n posgen neggen outfile &optional (colors *colors*) (code-length *code-length*)) "Generate n positive instances and n negative instances, interleaved, using the provided generation functions." (with-open-file (*standard-output* outfile :direction :output :if-exists :supersede) (loop for i from 1 to n do (progn (print-to-file "+" (funcall posgen colors code-length)) (print-to-file "-" (funcall neggen colors code-length)))))) (defun print-to-file (label code) (format t "~a " label) (loop for c in code do (format t "~s " c)) (terpri)) (defun generate-training-data (&optional (n 100) (posgens (list #'train-bias1-pos #'train-bias2-pos #'train-bias3-pos)) (neggens (list #'train-bias1-neg #'train-bias2-neg #'train-bias3-neg)) (outfiles (list "train-bias1.txt" "train-bias2.txt" "train-bias3.txt"))) "Generate n (default 100) instances with the specified positive-instance and negative-instance generation functions into the specified output files." (loop for pos in posgens for neg in neggens for outfile in outfiles do (gen-instances n pos neg outfile))) ;; TRAINING BIAS #4: Prefer codes with fewer colors. ;; Specifically, for a code of length N, the probability that a randomly ;; chosen code has K different colors is: ;; p(- | K) = (K/N) * (2/(N+1)) ;; So: ;; p(- | N) = 2 / (N+1) ;; p(- | 1) = 2 / (N(N+1)) ;; Codes consistent with this probability distribution are ;; generated by randomly generating a number in the range ;; [0,1], and using the cumulative probability distribution ;; function to map this probability to a number of colors in [1,N]. ;; ;; Note that this bias is only meaningful when there are at ;; least as many colors as pegs. ;; ;; Negative instances are generated by using the inverse ;; cumulative probability distribution function (i.e., using ;; (1-p) for each probability. (defun train-bias4-pos (&optional (colors *colors*) (code-length *code-length*)) "just use the code written for test bias 4, with the sign switched" (setf numcolors (bias4-colors code-length nil)) (generate-color-code colors numcolors code-length)) (defun train-bias4-neg (&optional (colors *colors*) (code-length *code-length*)) (setf numcolors (bias4-colors code-length t)) (generate-color-code colors numcolors code-length)) (defun generate-bias4-training-data (&optional (n 1000)) (gen-instances n #'train-bias4-pos #'train-bias4-neg "train-bias4.txt")) ;; Generate a code of length CODE-LENGTH using exactly NUMCOLORS ;; different colors from COLORS (defun generate-color-code (colors numcolors code-length) (let ((color-set (select-random numcolors colors)) (code nil)) ;; potentially inefficient generate-and-test approach ;; to make sure all colors are included (loop while (not (eq (count-colors code) numcolors)) do (setf code (mm-gen-random color-set code-length))) code)) (defun count-colors (code) (let ((colors nil)) (loop for c in code do (setf colors (adjoin c colors))) (length colors))) (defun bias4-colors (code-length posflag) (let ((p (random 1.0)) (cumprob 0) (pnext 0) (norm (* (/ 1 code-length) (/ 2 (+ 1 code-length))))) (if posflag (loop for i from code-length downto 1 do (progn (setf pnext (* i norm)) (incf cumprob pnext) (if (> cumprob p) (return-from bias4-colors i)))) (loop for i from 1 to code-length do (progn (setf pnext (* i norm)) (incf cumprob pnext) (if (> cumprob p) (return-from bias4-colors i))))))) (defun select-random (numcolors colors) (let ((color-set nil)) (loop while (< (length color-set) numcolors) do (setf color-set (adjoin (nth (random (length colors)) colors) color-set))) color-set))