1. Test Modules
  2. Training Characteristics
    1. Input Learning
      1. Gradient Descent
      2. Conjugate Gradient Descent
      3. Limited-Memory BFGS
    2. Results
  3. Results

Subreport: Logs for com.simiacryptus.ref.lang.ReferenceCountingBase

Test Modules

Using Seed 4539843012324434944

Training Characteristics

Input Learning

In this apply, we use a network to learn this target input, given it's pre-evaluated output:

TrainingTester.java:332 executed in 0.05 seconds (0.000 gc):

    return RefArrays.stream(RefUtil.addRef(input_target)).flatMap(RefArrays::stream).map(x -> {
      try {
        return x.prettyPrint();
      } finally {
        x.freeRef();
      }
    }).reduce((a, b) -> a + "\n" + b).orElse("");

Returns

    [
    	[ [ 0.468, -1.7, 1.96 ], [ -0.872, -1.02, 0.744 ], [ -1.936, 0.992, -1.608 ], [ -1.024, -0.544, 1.124 ], [ -1.316, 1.116, 0.856 ], [ 1.756, -1.296, -0.344 ], [ -1.428, 0.728, 1.412 ], [ -1.508, 1.292, 0.18 ], ... ],
    	[ [ 1.088, -1.856, 1.808 ], [ 0.912, -0.976, 1.416 ], [ -0.076, -0.04, -0.424 ], [ -1.868, 0.64, 0.884 ], [ -0.528, 0.472, 1.244 ], [ 1.708, -0.316, -0.28 ], [ 1.06, -0.308, -0.66 ], [ -1.472, -1.684, -0.204 ], ... ],
    	[ [ -0.404, 1.508, -0.02 ], [ -1.608, -0.764, -0.344 ], [ -0.984, 0.104, 1.828 ], [ 0.004, -0.972, -1.488 ], [ 1.368, -1.8, -1.272 ], [ 0.764, 1.32, -1.184 ], [ -1.104, -0.648, 1.472 ], [ -0.444, 0.516, -0.548 ], ... ],
    	[ [ -1.572, 0.576, -1.796 ], [ 1.332, 1.016, 0.164 ], [ -0.684, 1.756, -0.332 ], [ 1.844, 1.048, -1.44 ], [ -1.912, -1.14, -1.968 ], [ 1.432, 1.536, 0.288 ], [ -0.524, -0.788, 0.468 ], [ 0.728, 0.336, 1.084 ], ... ],
    	[ [ 1.484, 0.572, 1.728 ], [ -0.72, 1.616, -1.22 ], [ -0.56, 1.632, 1.848 ], [ -1.676, -1.2, -1.732 ], [ -0.256, -1.968, -1.472 ], [ 1.7, -0.876, 1.236 ], [ -0.304, 0.772, 0.708 ], [ 1.876, 0.944, 1.972 ], ... ],
    	[ [ -0.14, 1.148, -0.692 ], [ 0.0, 0.712, 1.392 ], [ -1.984, 0.096, -0.448 ], [ 1.108, 0.696, 1.08 ], [ -1.6, -0.648, 0.656 ], [ -1.624, 1.636, -0.98 ], [ -0.34, -1.036, -0.08 ], [ -1.556, -0.952, -0.508 ], ... ],
    	[ [ -1.236, -1.536, 0.036 ], [ -0.604, 0.608, -1.82 ], [ -0.672, 1.02, 1.86 ], [ 0.784, 1.148, 0.9 ], [ 1.152, -1.004, -1.944 ], [ 1.18, -1.436, 0.68 ], [ 1.512, 0.52, 1.928 ], [ -1.388, -1.628, -1.688 ], ... ],
    	[ [ 1.24, 1.704, -1.592 ], [ -0.484, -0.04, 1.732 ], [ 0.488, 1.652, -0.184 ], [ 1.624, -1.556, -1.952 ], [ -0.744, -0.74, -0.172 ], [ -0.892, 0.724, 1.212 ], [ 1.644, -0.828, -1.892 ], [ -0.1, -0.712, 1.836 ], ... ],
    	...
    ]
    [
    	[ [ -1.012, 0.092, -0.96 ], [ 0.636, 1.416, -0.456 ], [ 0.508, -0.032, 1.692 ], [ 0.6, 1.632, -1.408 ], [ -1.424, -1.752, -0.228 ], [ -1.34, 1.292, -0.712 ], [ 0.76, 0.136, -1.256 ], [ 1.9, -0.068, -0.012 ], ... ],
    	[ [ -0.004, 1.82, 0.092 ], [ -0.944, 0.524, 1.292 ], [ -0.724, 1.176, 0.624 ], [ 1.036, -1.98, 0.492 ], [ -0.784, -1.332, -1.628 ], [ 0.608, 0.38, 0.748 ], [ 0.68, 0.992, -1.504 ], [ -0.74, -1.456, 1.928 ], ... ],
    	[ [ -0.668, -1.6, 1.144 ], [ -1.94, 0.38, 0.168 ], [ 1.292, 1.392, 1.756 ], [ 1.228, -0.552, 0.104 ], [ 1.172, 0.084, 1.148 ], [ -1.272, 0.884, -0.596 ], [ -0.424, 0.616, 1.228 ], [ 1.324, 1.4, -1.604 ], ... ],
    	[ [ 0.036, 0.688, -1.36 ], [ -1.136, 1.616, 0.028 ], [ 1.004, 1.828, -0.6 ], [ -1.784, 1.372, 1.908 ], [ 0.076, 1.816, 0.768 ], [ -1.812, 0.964, 0.956 ], [ 0.988, -0.848, -1.928 ], [ 1.992, -0.872, -1.544 ], ... ],
    	[ [ 1.32, 0.464, -1.508 ], [ 1.372, 0.904, -1.824 ], [ 0.336, -1.796, -1.408 ], [ -0.984, -0.524, 0.2 ], [ 1.764, 1.808, 0.696 ], [ 1.288, 0.624, -1.376 ], [ 1.88, -1.24, -0.968 ], [ -0.496, -1.368, 0.26 ], ... ],
    	[ [ -1.86, 0.292, 0.056 ], [ 0.036, 0.368, 1.04 ], [ 0.34, -1.788, -1.22 ], [ -0.104, 1.892, -1.28 ], [ 0.24, 0.012, -1.22 ], [ -1.44, -1.136, -0.804 ], [ -0.152, 1.768, -0.784 ], [ -0.516, 1.144, 1.336 ], ... ],
    	[ [ 1.98, 1.78, 0.152 ], [ -0.02, 1.82, -0.228 ], [ -0.596, 0.8, 1.652 ], [ 1.42, 0.44, -0.856 ], [ -1.996, 1.772, 1.952 ], [ 1.484, 0.516, 0.484 ], [ -0.812, -1.156, -0.396 ], [ 1.744, -0.576, -1.716 ], ... ],
    	[ [ 0.4, -0.784, 0.336 ], [ 1.992, -0.268, 1.104 ], [ -1.312, 0.188, -1.108 ], [ -0.612, -1.468, -1.688 ], [ 0.976, -1.496, -0.1 ], [ 1.048, -0.72, 1.064 ], [ -0.604, -1.352, -1.176 ], [ 0.884, -0.74, -1.212 ], ... ],
    	...
    ]
    [
    	[ [ 1.392, -1.848, -0.428 ], [ -0.08, 1.804, -0.26 ], [ -1.74, -0.564, 1.028 ], [ 1.328, -0.68, 1.516 ], [ -1.276, 1.088, 0.048 ], [ 1.024, -0.456, -0.62 ], [ -1.26, -0.616, -1.956 ], [ -0.748, -0.108, -1.036 ], ... ],
    	[ [ -1.14, 0.544, -1.504 ], [ 0.964, -1.452, -1.252 ], [ 0.624, -0.052, 1.184 ], [ 1.748, -1.968, -1.948 ], [ 0.8, 1.26, -0.232 ], [ 0.044, 1.684, -1.1 ], [ 0.7, -0.456, 0.464 ], [ -0.928, -1.452, 0.328 ], ... ],
    	[ [ 1.264, -0.596, 0.172 ], [ -0.588, -1.064, -1.312 ], [ -1.016, 0.296, -1.008 ], [ -0.44, 0.044, -0.52 ], [ -1.068, -1.024, 1.84 ], [ 0.428, -0.636, -1.032 ], [ 1.092, 0.552, 0.056 ], [ -1.996, -1.788, 0.336 ], ... ],
    	[ [ 0.72, 0.872, -0.444 ], [ -1.396, 0.148, 0.508 ], [ -1.88, 0.064, 0.26 ], [ 0.904, -0.744, 1.672 ], [ -0.212, -0.608, 1.488 ], [ 0.028, -1.288, -1.872 ], [ -1.836, 0.072, 0.092 ], [ -0.5, 0.396, 0.984 ], ... ],
    	[ [ 1.032, -0.796, 0.168 ], [ -0.088, -0.7, 1.696 ], [ -0.068, -0.044, -0.228 ], [ -1.816, 0.952, -0.1 ], [ -0.912, 1.216, -1.204 ], [ -1.508, -0.724, 1.836 ], [ 0.752, -0.808, 0.5 ], [ 1.816, 0.128, 1.664 ], ... ],
    	[ [ 0.716, 0.932, -0.2 ], [ 0.14, 1.544, -0.764 ], [ 1.48, 1.748, 0.656 ], [ -1.364, -1.12, 0.104 ], [ 1.948, 1.74, -0.304 ], [ -1.832, -0.648, -0.856 ], [ -0.052, -1.656, -0.18 ], [ 0.18, -0.668, 1.352 ], ... ],
    	[ [ 0.804, 0.828, 1.636 ], [ -1.736, 0.408, -0.288 ], [ 0.584, -1.16, -1.772 ], [ -0.628, 1.624, -0.736 ], [ 0.476, -0.352, 0.392 ], [ 0.572, 0.876, 0.0 ], [ 1.536, -1.66, -0.92 ], [ 0.68, 0.828, -1.908 ], ... ],
    	[ [ -0.736, -1.012, -0.308 ], [ 1.256, -0.02, 1.528 ], [ -1.004, -0.04, -1.86 ], [ -1.7, 1.72, 0.496 ], [ -1.724, -0.496, 0.384 ], [ 0.64, -0.088, 0.076 ], [ -1.68, 0.632, 1.164 ], [ -0.224, -0.684, -1.348 ], ... ],
    	...
    ]
    [
    	[ [ 0.736, -0.104, 0.412 ], [ -1.756, -1.848, 1.496 ], [ -0.596, 0.444, 0.088 ], [ 1.304, 0.472, 0.176 ], [ -0.36, -0.056, -1.772 ], [ -0.908, -1.476, 1.62 ], [ 0.62, -1.008, -0.236 ], [ 0.444, -0.792, 1.036 ], ... ],
    	[ [ 1.136, -1.736, 0.784 ], [ 1.868, 1.176, 0.144 ], [ -1.0, -1.492, 0.832 ], [ -1.988, 0.708, 1.3 ], [ 0.836, -1.54, -0.108 ], [ 1.36, -1.868, -0.256 ], [ 0.768, -0.444, -1.832 ], [ 0.116, -0.828, -0.368 ], ... ],
    	[ [ -0.712, 1.984, -0.324 ], [ -0.504, 0.824, -1.168 ], [ -0.5, 1.892, 0.192 ], [ 0.216, 0.452, 0.708 ], [ -0.412, -0.928, 1.28 ], [ 1.476, 1.7, -1.992 ], [ -1.32, 0.756, -1.392 ], [ -0.364, -1.344, 1.0 ], ... ],
    	[ [ 1.148, 0.64, -1.004 ], [ -0.28, -0.384, -1.684 ], [ -0.476, -1.96, -1.304 ], [ 1.876, -1.364, 1.38 ], [ 1.664, 0.82, 1.464 ], [ 0.16, 1.376, 0.064 ], [ -0.644, -0.672, 0.588 ], [ -0.032, 0.768, -0.156 ], ... ],
    	[ [ -0.956, 1.42, -0.848 ], [ 0.932, 0.976, 0.956 ], [ 0.016, 0.94, 1.448 ], [ -0.876, -1.112, 1.66 ], [ -0.832, -1.744, 0.22 ], [ -1.216, 1.66, 0.22 ], [ -1.32, 1.052, 0.0 ], [ -0.956, 1.54, -0.06 ], ... ],
    	[ [ -0.232, 0.144, -1.692 ], [ 0.256, 0.652, 1.692 ], [ 0.204, 1.492, -0.62 ], [ -0.12, -1.676, 0.8 ], [ -1.796, -0.26, 1.484 ], [ -0.128, 1.096, -0.236 ], [ 0.036, 0.064, -0.468 ], [ 0.56, -0.32, 1.252 ], ... ],
    	[ [ 1.62, 0.22, 0.848 ], [ 1.152, 1.26, -1.76 ], [ 0.732, -1.584, 1.324 ], [ -0.02, 0.752, -1.948 ], [ 0.48, -1.452, -1.128 ], [ 0.012, 0.86, 1.46 ], [ -1.348, 0.456, 0.984 ], [ -1.456, 1.68, 1.296 ], ... ],
    	[ [ 0.904, -1.82, 1.104 ], [ 1.112, -0.96, -0.248 ], [ -0.964, 1.036, 1.884 ], [ -0.092, 1.268, -0.68 ], [ 0.336, -0.964, 0.74 ], [ 0.68, 0.412, -1.932 ], [ 1.708, 0.652, -1.276 ], [ -1.572, -0.084, -1.18 ], ... ],
    	...
    ]
    [
    	[ [ 1.756, 0.996, 1.408 ], [ -0.816, 0.584, 0.88 ], [ 0.336, -1.432, -0.324 ], [ 0.928, 0.124, 0.272 ], [ -0.728, -1.5, 1.716 ], [ -1.3, 0.616, 0.072 ], [ -1.1, 1.272, -0.284 ], [ 0.396, -0.972, 0.044 ], ... ],
    	[ [ 1.08, 1.972, -1.528 ], [ 0.68, 1.872, -0.62 ], [ 0.048, 1.708, 1.94 ], [ -0.576, 1.62, -1.44 ], [ -0.26, -1.276, 1.916 ], [ -0.844, -1.44, -0.672 ], [ 1.04, -0.492, -0.204 ], [ -0.552, 1.256, -0.192 ], ... ],
    	[ [ -1.656, -0.084, -1.416 ], [ 0.656, -1.472, -1.22 ], [ 1.24, 1.016, -0.604 ], [ 1.068, 1.236, -1.272 ], [ -1.564, -1.148, 0.516 ], [ -1.236, -1.7, -0.608 ], [ 0.82, 0.964, 1.572 ], [ 0.42, -1.184, -0.908 ], ... ],
    	[ [ 0.092, -0.232, 0.188 ], [ 0.752, -1.904, -1.764 ], [ -1.456, -1.988, -1.368 ], [ -1.368, -1.544, -1.632 ], [ 1.84, 0.912, 0.608 ], [ -0.924, -1.104, -0.452 ], [ -1.896, 0.08, 1.78 ], [ -0.756, 1.864, 1.036 ], ... ],
    	[ [ 1.936, -0.62, -0.528 ], [ -0.932, -1.164, 0.924 ], [ 0.656, 0.652, -1.34 ], [ 0.4, 1.672, 0.908 ], [ -1.704, -0.984, 0.052 ], [ -1.94, 0.656, -0.7 ], [ 0.46, 0.508, 0.448 ], [ -0.236, 1.54, -1.132 ], ... ],
    	[ [ -1.916, -0.368, 0.676 ], [ -1.7, -0.052, -0.328 ], [ -1.32, 0.472, -0.916 ], [ 0.88, 1.864, 0.584 ], [ 0.624, 0.2, 0.748 ], [ 0.892, -1.684, 0.792 ], [ -1.948, -1.724, 0.184 ], [ -0.544, -0.572, -1.5 ], ... ],
    	[ [ -0.824, 0.976, 1.916 ], [ 1.364, -0.828, 0.664 ], [ 1.18, -1.984, -0.912 ], [ -0.728, -1.024, -0.656 ], [ -0.984, 1.736, -0.68 ], [ -1.24, 0.136, -1.98 ], [ -0.876, 1.852, -0.868 ], [ 1.66, -1.184, 0.864 ], ... ],
    	[ [ -1.212, 0.876, -0.716 ], [ 1.596, -1.624, -0.54 ], [ -0.12, -0.592, 0.02 ], [ -0.776, 1.716, 1.34 ], [ 1.532, 0.288, 1.176 ], [ 0.616, -0.868, 1.224 ], [ -0.26, -0.172, 0.208 ], [ -1.06, -0.128, 0.692 ], ... ],
    	...
    ]

Gradient Descent

First, we train using basic gradient descent method apply weak line search conditions.

TrainingTester.java:480 executed in 34.12 seconds (4.316 gc):

    IterativeTrainer iterativeTrainer = new IterativeTrainer(trainable.addRef());
    try {
      iterativeTrainer.setLineSearchFactory(label -> new ArmijoWolfeSearch());
      iterativeTrainer.setOrientation(new GradientDescent());
      iterativeTrainer.setMonitor(TrainingTester.getMonitor(history));
      iterativeTrainer.setTimeout(30, TimeUnit.SECONDS);
      iterativeTrainer.setMaxIterations(250);
      iterativeTrainer.setTerminateThreshold(0);
      return iterativeTrainer.run();
    } finally {
      iterativeTrainer.freeRef();
    }
Logging
Reset training subject: 341990457166
Reset training subject: 342924602998
Low gradient: 1.3696881503722907E-9
Constructing line search parameters: GD
New Minimum: 1.3642420526593923E-13 > 0.0
th(0)=0.0;dx=-1.8760456292701497E-18
Armijo: th(2.154434690031884)=1.3642420526593923E-13; dx=6.984205408973978E-12 evalInputDelta=-1.3642420526593923E-13
New Minimum: 0.0 > -1.015905172607745E-6
WOLF (strong): th(1.077217345015942)=-1.015905172607745E-6; dx=3.492192678742288E-12 evalInputDelta=1.015905172607745E-6
WOLF (strong): th(0.3590724483386473)=-2.539903562137624E-7; dx=1.163840032838606E-12 evalInputDelta=2.539903562137624E-7
WOLF (strong): th(0.08976811208466183)=-8.460970093437937E-8; dx=2.9096526113742574E-13 evalInputDelta=8.460970093437937E-8
WOLF (strong): th(0.017953622416932366)=-2.111567027895944E-8; dx=5.819875004606557E-14 evalInputDelta=2.111567027895944E-8
WOLF (strong): th(0.002992270402822061)=-4.125604391447268E-9; dx=9.781308834787404E-15 evalInputDelta=4.125604391447268E-9
WOLF (strong): th(4.2746720040315154E-4)=-7.527660272899083E-10; dx=1.6102903655584284E-15 evalInputDelta=7.527660272899083E-10
WOLF (strong): th(5.343340005039394E-5)=-1.4340457710204647E-10; dx=2.3906181447275565E-17 evalInputDelta=1.4340457710204647E-10
END: th(5.9370444500437714E-6)=-2.4101609596982597E-12; dx=-1.6080391108030885E-18 evalInputDelta=2.4101609596982597E-12
Fitness changed from 1.3642420526593923E-13 to -1.015905172607745E-6
Iteration 1 complete. Error: -1.015905172607745E-6 Total: 14.5446; Orientation: 0.7061; Line Search: 12.1719
New Minimum: 4.092726157978177E-13 > -2.539903562137624E-7
th(0)=-2.539903562137624E-7;dx=-6.5005933684566276E-6
New Minimum: -2.539903562137624E-7 > -5.079807124275248E-7
WOLF (strong): th(1.279097451943557E-5)=-5.079807124275248E-7; dx=1.3718092862762639E-4 evalInputDelta=2.539903562137624E-7
Armijo: th(6.395487259717785E-6)=5.359851184039144E-6; dx=6.534039553533346E-5 evalInputDelta=-5.613841540252906E-6
Armijo: th(2.131829086572595E-6)=2.552923456278222E-6; dx=1.7446633558220693E-5 evalInputDelta=-2.8069138124919846E-6
Armijo: th(5.329572716431488E-7)=6.816416998844943E-7; dx=-5.139610422737861E-7 evalInputDelta=-9.356320560982567E-7
Armijo: th(1.0659145432862975E-7)=6.816416998844943E-7; dx=-5.303374208204161E-6 evalInputDelta=-9.356320560982567E-7
Armijo: th(1.776524238810496E-8)=-2.0718889572890476E-7; dx=-6.300953236645186E-6 evalInputDelta=-4.6801460484857644E-8
Armijo: th(2.53789176972928E-9)=-2.4625960577395744E-7; dx=-6.471675611095958E-6 evalInputDelta=-7.730750439804968E-9
Armijo: th(3.1723647121616E-10)=-2.528504637666629E-7; dx=-6.49721670182097E-6 evalInputDelta=-1.139892447099513E-9
Armijo: th(3.5248496801795556E-11)=-2.538108901717351E-7; dx=-6.500496936658168E-6 evalInputDelta=-1.794660420273007E-10
WOLFE (weak): th(3.5248496801795555E-12)=-2.539935621825862E-7; dx=-6.500592304183135E-6 evalInputDelta=3.205968823770748E-12
WOLFE (weak): th(1.9386673240987555E-11)=-5.079807124275248E-7; dx=-6.500570487009466E-6 evalInputDelta=2.539903562137624E-7
WOLFE (weak): th(2.7317585021391554E-11)=-2.5399142487003703E-7; dx=-6.500554772096801E-6 evalInputDelta=1.0686562746255423E-12
WOLFE (weak): th(3.128304091159356E-11)=-2.5399037895112997E-7; dx=-6.50054165073306E-6 evalInputDelta=2.273736756549903E-14
Armijo: th(3.326576885669456E-11)=-2.5398769594175973E-7; dx=-6.500497585258291E-6 evalInputDelta=-2.660272002675227E-12
WOLFE (weak): th(3.227440488414406E-11)=-2.53993721344159E-7; dx=-6.50054165073306E-6 evalInputDelta=3.3651303965704224E-12
Armijo: th(3.277008687041931E-11)=-2.5398769594175973E-7; dx=-6.50054165073306E-6 evalInputDelta=-2.660272002675227E-12
Armijo: th(3.2522245877281687E-11)=-2.5398769594175973E-7; dx=-6.50054165073306E-6 evalInputDelta=-2.660272002675227E-12
mu ~= nu (3.227440488414406E-11): th(1.279097451943557E-5)=-2.5398769594175973E-7
Fitness changed from 4.092726157978177E-13 to -5.079807124275248E-7
Iteration 2 complete. Error: -5.079807124275248E-7 Total: 19.5637; Orientation: 0.7782; Line Search: 18.3474
Final threshold in iteration 2: -5.079807124275248E-7 (> 0.0) after 34.109s (< 30.000s)

Returns

    -5.079807124275248E-7

Training Converged

Conjugate Gradient Descent

First, we use a conjugate gradient descent method, which converges the fastest for purely linear functions.

TrainingTester.java:452 executed in 50.32 seconds (1.997 gc):

    IterativeTrainer iterativeTrainer = new IterativeTrainer(trainable.addRef());
    try {
      iterativeTrainer.setLineSearchFactory(label -> new QuadraticSearch());
      iterativeTrainer.setOrientation(new GradientDescent());
      iterativeTrainer.setMonitor(TrainingTester.getMonitor(history));
      iterativeTrainer.setTimeout(30, TimeUnit.SECONDS);
      iterativeTrainer.setMaxIterations(250);
      iterativeTrainer.setTerminateThreshold(0);
      return iterativeTrainer.run();
    } finally {
      iterativeTrainer.freeRef();
    }
Logging
Reset training subject: 376213758363
Reset training subject: 376621831064
Low gradient: 1.369688150372291E-9
Constructing line search parameters: GD
F(0.0) = LineSearchPoint{point=PointSample{avg=1.3642420526593923E-13}, derivative=-1.8760456292701497E-18}
F(1.0E-10) = LineSearchPoint{point=PointSample{avg=1.3642420526593923E-13}, derivative=-1.8760456292701497E-18}, evalInputDelta = 0.0
F(7.000000000000001E-10) = LineSearchPoint{point=PointSample{avg=1.3642420526593923E-13}, derivative=-1.8760456292701497E-18}, evalInputDelta = 0.0
F(4.900000000000001E-9) = LineSearchPoint{point=PointSample{avg=1.3642420526593923E-13}, derivative=-1.8760456292701497E-18}, evalInputDelta = 0.0
F(3.430000000000001E-8) = LineSearchPoint{point=PointSample{avg=1.3642420526593923E-13}, derivative=-1.87604562927015E-18}, evalInputDelta = 0.0
F(2.4010000000000004E-7) = LineSearchPoint{point=PointSample{avg=1.3642420526593923E-13}, derivative=-1.8760456292701497E-18}, evalInputDelta = 0.0
F(1.6807000000000003E-6) = LineSearchPoint{point=PointSample{avg=1.3642420526593923E-13}, derivative=-1.8760456292701497E-18}, evalInputDelta = 0.0
F(1.1764900000000001E-5) = LineSearchPoint{point=PointSample{avg=1.3642420526593923E-13}, derivative=-9.29089264019264E-19}, evalInputDelta = 0.0
F(8.235430000000001E-5) = LineSearchPoint{point=PointSample{avg=2.955857780762017E-13}, derivative=7.27191020107757E-17}, evalInputDelta = 1.5916157281026246E-13
New Minimum: 1.3642420526593923E-13 > -5.638867150992155E-12
F(6.334946153846155E-6) = LineSearchPoint{point=PointSample{avg=-5.638867150992155E-12}, derivative=-1.6080391108030885E-18}, evalInputDelta = -5.775291356258095E-12
F(4.4344623076923086E-5) = LineSearchPoint{point=PointSample{avg=4.092726157978177E-13}, derivative=1.7259619789283964E-17}, evalInputDelta = 2.728484105318785E-13
F(3.4111248520710065E-6) = LineSearchPoint{point=PointSample{avg=-1.3642420526593924E-12}, derivative=-1.8760456292701497E-18}, evalInputDelta = -1.5006662579253315E-12
F(2.3877873964497047E-5) = LineSearchPoint{point=PointSample{avg=2.7284841053187846E-13}, derivative=3.895028068390565E-18}, evalInputDelta = 1.3642420526593923E-13
F(1.836759535730542E-6) = LineSearchPoint{point=PointSample{avg=1.3642420526593923E-13}, derivative=-1.8760456292701497E-18}, evalInputDelta = 0.0
F(1.2857316750113794E-5) = LineSearchPoint{point=PointSample{avg=2.7284841053187846E-13}, derivative=-7.146840492455431E-19}, evalInputDelta = 1.3642420526593923E-13
F(9.890243653933688E-7) = LineSearchPoint{point=PointSample{avg=2.0463630789890886E-13}, derivative=-1.8760456292701497E-18}, evalInputDelta = 6.821210263296963E-14
F(7.607879733795145E-8) = LineSearchPoint{point=PointSample{avg=2.7284841053187846E-13}, derivative=-1.8760456292701497E-18}, evalInputDelta = 1.3642420526593923E-13
F(5.852215179842419E-9) = LineSearchPoint{point=PointSample{avg=2.7284841053187846E-13}, derivative=-1.8760456292701497E-18}, evalInputDelta = 1.3642420526593923E-13
F(4.5017039844941683E-10) = LineSearchPoint{point=PointSample{avg=2.7284841053187846E-13}, derivative=-1.8760456292701497E-18}, evalInputDelta = 1.3642420526593923E-13
F(3.462849218841668E-11) = LineSearchPoint{point=PointSample{avg=2.7284841053187846E-13}, derivative=-1.8760456292701497E-18}, evalInputDelta = 1.3642420526593923E-13
F(2.4239944531891677E-10) = LineSearchPoint{point=PointSample{avg=2.7284841053187846E-13}, derivative=-1.8760456292701497E-18}, evalInputDelta = 1.3642420526593923E-13
F(1.8646111178378212E-11) = LineSearchPoint{point=PointSample{avg=2.7284841053187846E-13}, derivative=-1.8760456292701497E-18}, evalInputDelta = 1.3642420526593923E-13
F(1.3052277824864749E-10) = LineSearchPoint{point=PointSample{avg=2.7284841053187846E-13}, derivative=-1.87604562927015E-18}, evalInputDelta = 1.3642420526593923E-13
F(1.0040213711434422E-11) = LineSearchPoint{point=PointSample{avg=2.7284841053187846E-13}, derivative=-1.8760456292701497E-18}, evalInputDelta = 1.3642420526593923E-13
F(7.028149598004096E-11) = LineSearchPoint{point=PointSample{avg=2.7284841053187846E-13}, derivative=-1.8760456292701497E-18}, evalInputDelta = 1.3642420526593923E-13
F(4.919704718602867E-10) = LineSearchPoint{point=PointSample{avg=1.3642420526593923E-13}, derivative=-1.8760456292701497E-18}, evalInputDelta = 0.0
F(3.443793303022007E-9) = LineSearchPoint{point=PointSample{avg=2.7284841053187846E-13}, derivative=-1.8760456292701497E-18}, evalInputDelta = 1.3642420526593923E-13
F(2.64907177155539E-10) = LineSearchPoint{point=PointSample{avg=2.7284841053187846E-13}, derivative=-1.8760456292701497E-18}, evalInputDelta = 1.3642420526593923E-13
F(2.037747516581069E-11) = LineSearchPoint{point=PointSample{avg=2.7284841053187846E-13}, derivative=-1.8760456292701497E-18}, evalInputDelta = 1.3642420526593923E-13
F(1.4264232616067484E-10) = LineSearchPoint{point=PointSample{avg=2.7284841053187846E-13}, derivative=-1.8760456292701497E-18}, evalInputDelta = 1.3642420526593923E-13
F(1.0972486627744219E-11) = LineSearchPoint{point=PointSample{avg=2.7284841053187846E-13}, derivative=-1.8760456292701497E-18}, evalInputDelta = 1.3642420526593923E-13
F(7.680740639420954E-11) = LineSearchPoint{point=PointSample{avg=2.7284841053187846E-13}, derivative=-1.8760456292701497E-18}, evalInputDelta = 1.3642420526593923E-13
F(5.376518447594668E-10) = LineSearchPoint{point=PointSample{avg=2.7284841053187846E-13}, derivative=-1.8760456292701497E-18}, evalInputDelta = 1.3642420526593923E-13
F(4.135783421226668E-11) = LineSearchPoint{point=PointSample{avg=2.7284841053187846E-13}, derivative=-1.8760456292701497E-18}, evalInputDelta = 1.3642420526593923E-13
F(2.8950483948586675E-10) = LineSearchPoint{point=PointSample{avg=2.7284841053187846E-13}, derivative=-1.8760456292701497E-18}, evalInputDelta = 1.3642420526593923E-13
F(2.2269603037374367E-11) = LineSearchPoint{point=PointSample{avg=2.7284841053187846E-13}, derivative=-1.87604562927015E-18}, evalInputDelta = 1.3642420526593923E-13
F(1.5588722126162056E-10) = LineSearchPoint{point=PointSample{avg=2.7284841053187846E-13}, derivative=-1.8760456292701497E-18}, evalInputDelta = 1.3642420526593923E-13
F(1.1991324712432351E-11) = LineSearchPoint{point=PointSample{avg=2.7284841053187846E-13}, derivative=-1.8760456292701497E-18}, evalInputDelta = 1.3642420526593923E-13
F(8.393927298702646E-11) = LineSearchPoint{point=PointSample{avg=2.7284841053187846E-13}, derivative=-1.8760456292701497E-18}, evalInputDelta = 1.3642420526593923E-13
F(5.875749109091853E-10) = LineSearchPoint{point=PointSample{avg=2.7284841053187846E-13}, derivative=-1.8760456292701497E-18}, evalInputDelta = 1.3642420526593923E-13
F(4.5198070069937326E-11) = LineSearchPoint{point=PointSample{avg=2.7284841053187846E-13}, derivative=-1.8760456292701497E-18}, evalInputDelta = 1.3642420526593923E-13
F(3.1638649048956127E-10) = LineSearchPoint{point=PointSample{avg=2.7284841053187846E-13}, derivative=-1.8760456292701497E-18}, evalInputDelta = 1.3642420526593923E-13
F(2.4337422345350867E-11) = LineSearchPoint{point=PointSample{avg=2.7284841053187846E-13}, derivative=-1.87604562927015E-18}, evalInputDelta = 1.3642420526593923E-13
F(1.7036195641745607E-10) = LineSearchPoint{point=PointSample{avg=2.7284841053187846E-13}, derivative=-1.8760456292701497E-18}, evalInputDelta = 1.3642420526593923E-13
F(1.3104765878265852E-11) = LineSearchPoint{point=PointSample{avg=2.7284841053187846E-13}, derivative=-1.8760456292701497E-18}, evalInputDelta = 1.3642420526593923E-13
F(9.173336114786096E-11) = LineSearchPoint{point=PointSample{avg=2.7284841053187846E-13}, derivative=-1.8760456292701497E-18}, evalInputDelta = 1.3642420526593923E-13
F(6.421335280350267E-10) = LineSearchPoint{point=PointSample{avg=2.7284841053187846E-13}, derivative=-1.8760456292701497E-18}, evalInputDelta = 1.3642420526593923E-13
F(4.939488677192513E-11) = LineSearchPoint{point=PointSample{avg=2.7284841053187846E-13}, derivative=-1.8760456292701497E-18}, evalInputDelta = 1.3642420526593923E-13
F(3.457642074034759E-10) = LineSearchPoint{point=PointSample{avg=2.7284841053187846E-13}, derivative=-1.8760456292701497E-18}, evalInputDelta = 1.3642420526593923E-13
F(2.65972467233443E-11) = LineSearchPoint{point=PointSample{avg=2.7284841053187846E-13}, derivative=-1.8760456292701497E-18}, evalInputDelta = 1.3642420526593923E-13
F(1.861807270634101E-10) = LineSearchPoint{point=PointSample{avg=2.7284841053187846E-13}, derivative=-1.87604562927015E-18}, evalInputDelta = 1.3642420526593923E-13
F(1.4321594389493084E-11) = LineSearchPoint{point=PointSample{avg=2.7284841053187846E-13}, derivative=-1.8760456292701497E-18}, evalInputDelta = 1.3642420526593923E-13
F(1.0025116072645159E-10) = LineSearchPoint{point=PointSample{avg=2.7284841053187846E-13}, derivative=-1.8760456292701497E-18}, evalInputDelta = 1.3642420526593923E-13
F(7.711627748188584E-12) = LineSearchPoint{point=PointSample{avg=2.7284841053187846E-13}, derivative=-1.8760456292701497E-18}, evalInputDelta = 1.3642420526593923E-13
Loops = 52
F(1.0E-10) = LineSearchPoint{point=PointSample{avg=2.7284841053187846E-13}, derivative=-1.8760456292701497E-18}, evalInputDelta = 1.3642420526593923E-13
Right bracket at 1.0E-10
Converged to right
Fitness changed from 1.3642420526593923E-13 to -5.638867150992155E-12
Iteration 1 complete. Error: -5.638867150992155E-12 Total: 50.3194; Orientation: 0.7680; Line Search: 48.1055
Final threshold in iteration 1: -5.638867150992155E-12 (> 0.0) after 50.320s (< 30.000s)

Returns

    -5.638867150992155E-12

Training Converged

Limited-Memory BFGS

Next, we apply the same optimization using L-BFGS, which is nearly ideal for purely second-order or quadratic functions.

TrainingTester.java:509 executed in 40.10 seconds (2.043 gc):

    IterativeTrainer iterativeTrainer = new IterativeTrainer(trainable.addRef());
    try {
      iterativeTrainer.setLineSearchFactory(label -> new ArmijoWolfeSearch());
      iterativeTrainer.setOrientation(new LBFGS());
      iterativeTrainer.setMonitor(TrainingTester.getMonitor(history));
      iterativeTrainer.setTimeout(30, TimeUnit.SECONDS);
      iterativeTrainer.setIterationsPerSample(100);
      iterativeTrainer.setMaxIterations(250);
      iterativeTrainer.setTerminateThreshold(0);
      return iterativeTrainer.run();
    } finally {
      iterativeTrainer.freeRef();
    }
Logging
Reset training subject: 426656534442
Reset training subject: 427085112941
Adding measurement 5577d4cb to history. Total: 0
LBFGS Accumulation History: 1 points
Constructing line search parameters: GD
Non-optimal measurement 2.7284841053187846E-13 < 2.7284841053187846E-13. Total: 1
th(0)=2.7284841053187846E-13;dx=-1.8760456292701497E-18
Adding measurement 713289d7 to history. Total: 1
New Minimum: 2.7284841053187846E-13 > -697.4519999999997
WOLF (strong): th(2.154434690031884)=-697.4519999999997; dx=6.984205408973978E-12 evalInputDelta=697.4519999999999
Non-optimal measurement -1.015905172607745E-6 < -697.4519999999997. Total: 2
WOLF (strong): th(1.077217345015942)=-1.015905172607745E-6; dx=3.492192678742288E-12 evalInputDelta=1.0159054454561557E-6
Non-optimal measurement -5.079807124275248E-7 < -697.4519999999997. Total: 2
WOLF (strong): th(0.3590724483386473)=-5.079807124275248E-7; dx=1.163840032838606E-12 evalInputDelta=5.079809852759354E-7
Non-optimal measurement -1.6921940186875873E-7 < -697.4519999999997. Total: 2
WOLF (strong): th(0.08976811208466183)=-1.6921940186875873E-7; dx=2.9096526113742574E-13 evalInputDelta=1.6921967471716926E-7
Non-optimal measurement -4.223134055791888E-8 < -697.4519999999997. Total: 2
WOLF (strong): th(0.017953622416932366)=-4.223134055791888E-8; dx=5.819875004606557E-14 evalInputDelta=4.223161340632942E-8
Non-optimal measurement -8.251208782894536E-9 < -697.4519999999997. Total: 2
WOLF (strong): th(0.002992270402822061)=-8.251208782894536E-9; dx=9.781308834787404E-15 evalInputDelta=8.251481631305067E-9
Non-optimal measurement -1.5055320545798167E-9 < -697.4519999999997. Total: 2
WOLF (strong): th(4.2746720040315154E-4)=-1.5055320545798167E-9; dx=1.6102903655584284E-15 evalInputDelta=1.5058049029903485E-9
Non-optimal measurement -2.8680915420409294E-10 < -697.4519999999997. Total: 2
WOLF (strong): th(5.343340005039394E-5)=-2.8680915420409294E-10; dx=2.3906181447275565E-17 evalInputDelta=2.8708200261462483E-10
Non-optimal measurement -4.8203219193965195E-12 < -697.4519999999997. Total: 2
END: th(5.9370444500437714E-6)=-4.8203219193965195E-12; dx=-1.6080391108030885E-18 evalInputDelta=5.093170329928398E-12
Fitness changed from 2.7284841053187846E-13 to -697.4519999999997
Iteration 1 complete. Error: -697.4519999999997 Total: 13.9290; Orientation: 0.8683; Line Search: 11.7914
Non-optimal measurement 4.092726157978177E-13 < -697.4519999999997. Total: 2
LBFGS Accumulation History: 2 points
Non-optimal measurement -1.015905172607745E-6 < -697.4519999999997. Total: 2
New Minimum: 4.092726157978177E-13 > -1.015905172607745E-6
th(0)=-1.015905172607745E-6;dx=-2.6001033525165724E-5
Non-optimal measurement -1.015905172607745E-6 < -697.4519999999997. Total: 2
Armijo: th(1.279097451943557E-5)=-1.015905172607745E-6; dx=5.486959128098727E-4 evalInputDelta=0.0
Non-optimal measurement 2.143850406355341E-5 < -697.4519999999997. Total: 2
Armijo: th(6.395487259717785E-6)=2.143850406355341E-5; dx=2.6134716282915146E-4 evalInputDelta=-2.2454409236161155E-5
Non-optimal measurement 1.0211319431618903E-5 < -697.4519999999997. Total: 2
Armijo: th(2.131829086572595E-6)=1.0211319431618903E-5; dx=6.97828847434993E-5 evalInputDelta=-1.1227224604226649E-5
Non-optimal measurement 2.7265459266345717E-6 < -697.4519999999997. Total: 2
Armijo: th(5.329572716431488E-7)=2.7265459266345717E-6; dx=-2.0557089928633143E-6 evalInputDelta=-3.7424510992423166E-6
Non-optimal measurement -8.017814252525568E-8 < -697.4519999999997. Total: 2
Armijo: th(1.0659145432862975E-7)=-8.017814252525568E-8; dx=-2.1211514952526256E-5 evalInputDelta=-9.357270300824894E-7
Non-optimal measurement 1.0 < -697.4519999999997. Total: 2
Armijo: th(1.776524238810496E-8)=1.0; dx=-2.5203701578454876E-5 evalInputDelta=-1.0000010159051727
Non-optimal measurement -9.847540241025854E-7 < -697.4519999999997. Total: 2
Armijo: th(2.53789176972928E-9)=-9.847540241025854E-7; dx=-2.5887546066037018E-5 evalInputDelta=-3.1151148505159696E-8
Non-optimal measurement -1.0113911685039057E-6 < -697.4519999999997. Total: 2
Armijo: th(3.1723647121616E-10)=-1.0113911685039057E-6; dx=-2.598748697084262E-5 evalInputDelta=-4.514004103839355E-9
Non-optimal measurement -1.0152339655178367E-6 < -697.4519999999997. Total: 2
Armijo: th(3.5248496801795556E-11)=-1.0152339655178367E-6; dx=-2.6000274280183687E-5 evalInputDelta=-6.71207089908421E-10
Non-optimal measurement -1.0159303656109842E-6 < -697.4519999999997. Total: 2
New Minimum: -1.015905172607745E-6 > -1.0159303656109842E-6
WOLFE (weak): th(3.5248496801795555E-12)=-1.0159303656109842E-6; dx=-2.6001029035384246E-5 evalInputDelta=2.519300323906776E-11
Non-optimal measurement -1.01590512713301E-6 < -697.4519999999997. Total: 2
Armijo: th(1.9386673240987555E-11)=-1.01590512713301E-6; dx=-2.6000841194648208E-5 evalInputDelta=-4.547473513099806E-14
Non-optimal measurement -1.0159106750506909E-6 < -697.4519999999997. Total: 2
WOLFE (weak): th(1.1455761460583555E-11)=-1.0159106750506909E-6; dx=-2.600096371583415E-5 evalInputDelta=5.5024429457685675E-12
Non-optimal measurement -1.0159030352951958E-6 < -697.4519999999997. Total: 2
Armijo: th(1.5421217350785553E-11)=-1.0159030352951958E-6; dx=-2.6000933917152788E-5 evalInputDelta=-2.1373125492510847E-12
Non-optimal measurement -1.0158997611142696E-6 < -697.4519999999997. Total: 2
Armijo: th(1.3438489405684554E-11)=-1.0158997611142696E-6; dx=-2.6000953804961636E-5 evalInputDelta=-5.411493475506571E-12
Non-optimal measurement -1.0159034445678116E-6 < -697.4519999999997. Total: 2
Armijo: th(1.2447125433134055E-11)=-1.0159034445678116E-6; dx=-2.6000956266146053E-5 evalInputDelta=-1.7280399334956186E-12
Non-optimal measurement -1.0159037628909573E-6 < -697.4519999999997. Total: 2
Armijo: th(1.1951443446858805E-11)=-1.0159037628909573E-6; dx=-2.600096371583415E-5 evalInputDelta=-1.4097167877903904E-12
Non-optimal measurement -1.0159030352951958E-6 < -697.4519999999997. Total: 2
Armijo: th(1.1703602453721181E-11)=-1.0159030352951958E-6; dx=-2.600096371583415E-5 evalInputDelta=-2.1373125492510847E-12
Non-optimal measurement -1.0159030352951958E-6 < -697.4519999999997. Total: 2
Armijo: th(1.1579681957152368E-11)=-1.0159030352951958E-6; dx=-2.600096371583415E-5 evalInputDelta=-2.1373125492510847E-12
Non-optimal measurement -1.0159030352951958E-6 < -697.4519999999997. Total: 2
Armijo: th(1.1517721708867962E-11)=-1.0159030352951958E-6; dx=-2.600096371583415E-5 evalInputDelta=-2.1373125492510847E-12
Non-optimal measurement -1.0159030352951958E-6 < -697.4519999999997. Total: 2
mu ~= nu (1.1455761460583555E-11): th(3.5248496801795555E-12)=-1.0159030352951958E-6
Fitness changed from 4.092726157978177E-13 to -1.0159303656109842E-6
Iteration 2 complete. Error: -1.0159303656109842E-6 Total: 26.1727; Orientation: 0.7103; Line Search: 25.0362
Final threshold in iteration 2: -1.0159303656109842E-6 (> 0.0) after 40.102s (< 30.000s)

Returns

    -1.0159303656109842E-6

Training Converged

TrainingTester.java:432 executed in 0.15 seconds (0.000 gc):

    return TestUtil.compare(title + " vs Iteration", runs);
Logging
Plotting range=[1.0, 0.0], [2.0, 1.0]; valueStats=DoubleSummaryStatistics{count=0, sum=0.000000, min=Infinity, average=0.000000, max=-Infinity}
Only 0 points for GD
Only 0 points for CjGD
Only 0 points for LBFGS

Returns

Result

TrainingTester.java:435 executed in 0.00 seconds (0.000 gc):

    return TestUtil.compareTime(title + " vs Time", runs);
Logging
No Data

Results

TrainingTester.java:255 executed in 0.00 seconds (0.000 gc):

    return grid(inputLearning, modelLearning, completeLearning);

Returns

Result

TrainingTester.java:258 executed in 0.00 seconds (0.000 gc):

    return new ComponentResult(null == inputLearning ? null : inputLearning.value,
        null == modelLearning ? null : modelLearning.value, null == completeLearning ? null : completeLearning.value);

Returns

    {"input":{ "LBFGS": { "type": "NonConverged", "value": -697.4519999999997 }, "CjGD": { "type": "Converged", "value": -5.638867150992155E-12 }, "GD": { "type": "NonConverged", "value": -1.015905172607745E-6 } }, "model":null, "complete":null}

LayerTests.java:425 executed in 0.00 seconds (0.000 gc):

    throwException(exceptions.addRef());

Results

detailsresult
{"input":{ "LBFGS": { "type": "NonConverged", "value": -697.4519999999997 }, "CjGD": { "type": "Converged", "value": -5.638867150992155E-12 }, "GD": { "type": "NonConverged", "value": -1.015905172607745E-6 } }, "model":null, "complete":null}OK
  {
    "result": "OK",
    "performance": {
      "execution_time": "128.966",
      "gc_time": "8.622"
    },
    "created_on": 1586734970259,
    "file_name": "trainingTest",
    "report": {
      "simpleName": "Double",
      "canonicalName": "com.simiacryptus.mindseye.layers.cudnn.SumReducerLayerTest.Double",
      "link": "https://github.com/SimiaCryptus/mindseye-cudnn/tree/59d5b3318556370acb2d83ee6ec123ce0fc6974f/src/test/java/com/simiacryptus/mindseye/layers/cudnn/SumReducerLayerTest.java",
      "javaDoc": ""
    },
    "training_analysis": {
      "input": {
        "LBFGS": {
          "type": "NonConverged",
          "value": -697.4519999999997
        },
        "CjGD": {
          "type": "Converged",
          "value": -5.638867150992155E-12
        },
        "GD": {
          "type": "NonConverged",
          "value": -1.015905172607745E-6
        }
      }
    },
    "archive": "s3://code.simiacrypt.us/tests/com/simiacryptus/mindseye/layers/cudnn/SumReducerLayer/Double/trainingTest/202004124250",
    "id": "30e52886-52db-4289-9403-2876c47eb5de",
    "report_type": "Components",
    "display_name": "Comparative Training",
    "target": {
      "simpleName": "SumReducerLayer",
      "canonicalName": "com.simiacryptus.mindseye.layers.cudnn.SumReducerLayer",
      "link": "https://github.com/SimiaCryptus/mindseye-cudnn/tree/59d5b3318556370acb2d83ee6ec123ce0fc6974f/src/main/java/com/simiacryptus/mindseye/layers/cudnn/SumReducerLayer.java",
      "javaDoc": ""
    }
  }