1. Test Modules
  2. Training Characteristics
    1. Input Learning
      1. Gradient Descent
      2. Conjugate Gradient Descent
      3. Limited-Memory BFGS
    2. Results
  3. Results

Subreport: Logs for com.simiacryptus.ref.lang.ReferenceCountingBase

Test Modules

Using Seed 6298239604033405952

Training Characteristics

Input Learning

In this apply, we use a network to learn this target input, given it's pre-evaluated output:

TrainingTester.java:332 executed in 0.03 seconds (0.000 gc):

    return RefArrays.stream(RefUtil.addRef(input_target)).flatMap(RefArrays::stream).map(x -> {
      try {
        return x.prettyPrint();
      } finally {
        x.freeRef();
      }
    }).reduce((a, b) -> a + "\n" + b).orElse("");

Returns

    [
    	[ [ -1.636, 1.24, 1.852 ], [ -1.1, -1.544, -1.368 ], [ -1.676, -0.844, -1.696 ], [ -1.808, 0.796, -1.624 ], [ 1.46, 0.136, 1.896 ], [ -1.904, -1.0, 0.448 ], [ -0.748, -0.988, 0.58 ], [ 1.74, -1.732, -1.24 ], ... ],
    	[ [ -1.556, -1.92, 0.08 ], [ 0.616, -0.064, 0.692 ], [ -0.1, 1.056, -1.724 ], [ -0.224, 0.116, 0.116 ], [ 0.208, 1.08, -1.572 ], [ -0.88, 0.004, -1.24 ], [ 0.62, -0.684, -1.568 ], [ 0.512, 1.24, -1.304 ], ... ],
    	[ [ -1.004, 1.696, -0.336 ], [ 1.02, -0.644, 0.424 ], [ 0.916, 0.612, -0.964 ], [ 1.832, 0.468, 0.492 ], [ 1.008, -0.484, 1.204 ], [ -1.708, -0.404, -1.992 ], [ 0.212, 0.924, 1.364 ], [ 0.36, -0.988, -0.152 ], ... ],
    	[ [ -0.632, 0.616, -0.3 ], [ 1.532, -1.452, -1.636 ], [ 1.62, -0.044, -1.256 ], [ 0.964, -0.348, 1.44 ], [ 1.5, -0.056, -0.852 ], [ -1.904, 0.86, 0.048 ], [ 1.952, 0.02, 0.256 ], [ -0.792, -0.712, 1.328 ], ... ],
    	[ [ 0.924, 1.18, 0.268 ], [ 1.196, 1.632, 1.088 ], [ -0.436, -0.348, 0.184 ], [ 0.736, -1.932, -1.136 ], [ -1.388, 1.448, 0.236 ], [ -1.98, -1.216, -0.532 ], [ -1.96, -0.192, 1.292 ], [ 1.38, 0.592, -1.316 ], ... ],
    	[ [ -1.776, -0.368, -1.544 ], [ -0.352, 1.2, 0.152 ], [ -1.968, 0.696, -0.604 ], [ -0.82, 1.356, -1.62 ], [ -1.244, 0.848, -0.984 ], [ -0.032, 1.984, -1.76 ], [ -1.148, 0.688, 0.784 ], [ -0.852, 0.28, 0.864 ], ... ],
    	[ [ -0.872, 1.944, -0.448 ], [ 0.016, 1.568, 1.028 ], [ -1.332, 1.232, 0.996 ], [ 0.344, 0.016, -0.128 ], [ -1.58, 0.768, -1.372 ], [ -0.632, -0.288, -1.716 ], [ -0.024, -0.532, -1.012 ], [ 1.2, 0.852, -1.252 ], ... ],
    	[ [ 0.096, -0.644, -1.792 ], [ -1.888, 0.492, -0.004 ], [ -1.648, 0.304, 0.024 ], [ 1.284, 0.724, -0.18 ], [ 0.12, 0.268, -0.632 ], [ -1.352, -0.5, 0.216 ], [ 0.264, 1.96, 0.372 ], [ -0.492, -0.724, 1.68 ], ... ],
    	...
    ]
    [
    	[ [ 1.36, 0.568, -0.22 ], [ -0.624, -1.78, 1.968 ], [ 0.464, -1.452, 0.872 ], [ -1.68, -1.4, -1.02 ], [ 1.076, 1.048, 0.524 ], [ 0.312, -0.496, 1.888 ], [ -0.136, 0.94, -1.296 ], [ -1.164, -1.656, 0.564 ], ... ],
    	[ [ 0.568, -1.2, -1.932 ], [ 0.128, -1.0, 0.94 ], [ -1.86, 0.032, 1.932 ], [ 0.204, -1.896, 1.172 ], [ 0.628, -1.912, -1.848 ], [ 1.488, 1.184, 0.796 ], [ 0.212, 0.576, -1.636 ], [ -1.288, -0.184, 0.96 ], ... ],
    	[ [ -0.184, 0.836, -1.036 ], [ -0.308, -1.684, 1.832 ], [ 0.572, 1.408, 1.268 ], [ -0.364, -0.856, -1.756 ], [ 0.244, -0.964, -0.316 ], [ 0.408, -0.028, 1.8 ], [ -1.96, 1.956, 1.988 ], [ -1.112, -1.924, -1.552 ], ... ],
    	[ [ -0.616, 1.324, -1.48 ], [ -1.72, 1.196, 1.94 ], [ -0.704, -0.808, -0.488 ], [ 0.164, 0.916, -0.088 ], [ -1.608, 1.268, 0.54 ], [ 1.76, -1.832, 1.9 ], [ -1.42, 1.356, 0.428 ], [ -0.516, 0.3, -1.26 ], ... ],
    	[ [ 1.876, 1.764, 0.656 ], [ -1.428, 1.548, 1.78 ], [ 0.652, -1.608, 1.788 ], [ 0.608, -1.344, -1.216 ], [ 1.692, 0.048, -0.888 ], [ 1.216, -0.452, -1.812 ], [ -1.396, -1.416, -1.072 ], [ 0.068, -1.108, -0.092 ], ... ],
    	[ [ 1.412, 1.968, -0.856 ], [ -1.568, 1.196, -0.796 ], [ -0.524, 0.832, 1.82 ], [ -0.992, 0.928, -1.124 ], [ 0.168, 0.276, 1.076 ], [ 1.888, 0.512, 0.792 ], [ -0.624, -1.496, -0.188 ], [ -1.184, 1.976, 0.068 ], ... ],
    	[ [ 1.764, -0.852, 1.124 ], [ -1.412, 1.548, 0.768 ], [ 0.28, 0.956, -0.088 ], [ -1.872, 0.956, 0.944 ], [ -1.296, 0.38, -1.808 ], [ 0.156, 0.64, 1.988 ], [ -1.612, -0.764, 1.836 ], [ -1.824, 1.184, -0.344 ], ... ],
    	[ [ -0.48, 1.84, -1.904 ], [ 1.728, -1.276, -1.844 ], [ -1.628, -1.372, 1.344 ], [ -0.536, -0.028, -1.824 ], [ -1.24, 0.544, 1.416 ], [ 0.012, 1.756, -0.768 ], [ 0.22, -0.64, -1.868 ], [ 0.196, 1.556, 1.364 ], ... ],
    	...
    ]
    [
    	[ [ -1.592, -1.432, -1.048 ], [ -0.188, -0.436, 0.216 ], [ -1.98, -1.12, 1.7 ], [ 1.508, 0.92, 1.548 ], [ -0.96, -1.724, -0.12 ], [ 0.56, -1.08, -1.656 ], [ 0.368, -1.236, 1.0 ], [ -1.184, -0.264, -1.716 ], ... ],
    	[ [ -1.412, -1.232, -1.004 ], [ -0.58, -0.448, -1.292 ], [ -0.86, -1.084, 1.504 ], [ -1.612, -0.1, -1.524 ], [ -0.88, 1.956, 1.828 ], [ -1.172, -0.28, -0.828 ], [ -1.328, -1.828, 1.856 ], [ 0.364, 1.988, 1.688 ], ... ],
    	[ [ -1.092, 1.984, -1.224 ], [ 1.02, 0.668, -1.62 ], [ 0.804, -1.752, 1.244 ], [ 0.596, -1.476, -0.132 ], [ 0.828, -1.696, 0.24 ], [ -0.572, -1.82, 0.376 ], [ 0.236, 0.18, 0.096 ], [ -1.784, 1.852, 0.776 ], ... ],
    	[ [ -1.156, 0.236, 1.18 ], [ 0.976, -1.08, 0.456 ], [ 0.784, 1.856, 1.956 ], [ 0.688, -1.732, 0.604 ], [ 1.06, -0.832, -0.868 ], [ 1.488, -0.244, 1.532 ], [ -1.012, -0.34, -0.712 ], [ -0.896, -0.1, 0.36 ], ... ],
    	[ [ 0.18, 1.728, -0.052 ], [ 0.28, 0.784, 1.392 ], [ 1.544, 1.98, 0.576 ], [ -1.488, -1.488, -0.912 ], [ -0.864, -1.816, -1.396 ], [ -0.04, 1.292, -0.024 ], [ 1.432, 0.804, 1.068 ], [ 1.604, 1.136, 1.188 ], ... ],
    	[ [ 0.064, 1.384, -0.508 ], [ -0.42, 0.116, -0.944 ], [ 1.836, -0.468, 0.252 ], [ -1.868, -0.356, 1.62 ], [ -0.568, 0.164, 0.38 ], [ -1.36, -0.748, 1.088 ], [ 0.268, 1.796, -0.24 ], [ -1.416, 1.324, 1.336 ], ... ],
    	[ [ 1.364, 0.104, 0.128 ], [ 0.276, -0.412, 1.964 ], [ 1.372, -0.108, -0.788 ], [ 0.676, 1.528, -1.568 ], [ 1.408, 0.212, 0.84 ], [ 1.908, -1.444, -0.412 ], [ 0.952, -0.012, -0.924 ], [ 0.344, 0.52, 0.052 ], ... ],
    	[ [ -1.196, -1.456, -0.06 ], [ -1.968, -0.116, -0.14 ], [ -1.14, -0.228, -0.548 ], [ -0.012, 1.472, -1.412 ], [ 0.896, -0.204, 1.372 ], [ 0.828, -0.04, 1.7 ], [ 0.32, -1.544, 1.18 ], [ 0.736, -1.0, -0.796 ], ... ],
    	...
    ]
    [
    	[ [ 1.312, -0.092, -0.508 ], [ 0.596, 0.232, 0.384 ], [ -0.632, -1.344, -1.816 ], [ -1.42, -0.212, 0.304 ], [ -1.056, 1.336, -0.872 ], [ 0.712, -1.488, 0.696 ], [ 1.12, 0.78, 0.74 ], [ -1.016, 0.276, -0.004 ], ... ],
    	[ [ -0.896, 0.4, -1.5 ], [ -0.9, 0.568, -1.172 ], [ 1.82, -1.196, -0.596 ], [ -0.952, 1.072, -1.956 ], [ 1.016, 1.652, -0.672 ], [ 0.26, 0.724, -1.304 ], [ -1.764, 0.976, -1.36 ], [ 0.156, 0.148, -1.744 ], ... ],
    	[ [ 0.228, -1.808, -1.056 ], [ 0.46, -1.56, 1.132 ], [ 1.724, -0.164, -1.372 ], [ 1.372, -1.624, 0.232 ], [ 1.124, 1.252, -1.716 ], [ 1.52, -0.024, 0.972 ], [ 0.62, 0.04, -1.048 ], [ -1.652, -0.24, -1.884 ], ... ],
    	[ [ 0.996, 1.208, -0.276 ], [ -0.112, 0.328, 1.948 ], [ 0.444, -0.164, 0.46 ], [ 0.348, -0.86, -0.108 ], [ -1.556, 1.3, 1.688 ], [ -0.632, 1.06, 1.748 ], [ -1.312, -1.78, -0.08 ], [ -1.232, 1.328, -1.356 ], ... ],
    	[ [ 1.216, -1.504, 1.264 ], [ 1.724, 1.536, 1.692 ], [ -0.928, 0.908, -0.492 ], [ 0.868, 1.82, 0.008 ], [ 0.968, -1.608, -1.564 ], [ -0.588, -1.228, 1.972 ], [ -0.28, 0.132, 1.344 ], [ -0.592, -0.412, 1.116 ], ... ],
    	[ [ -1.1, -0.276, 1.62 ], [ -1.692, -0.636, -0.324 ], [ -1.256, 0.468, -0.464 ], [ -1.356, 1.908, -0.504 ], [ 0.424, -0.544, 0.74 ], [ -0.404, 1.88, -0.396 ], [ 1.016, -1.644, -0.32 ], [ 1.372, -0.16, 0.756 ], ... ],
    	[ [ 0.792, -1.372, -0.396 ], [ -0.308, -0.728, -0.268 ], [ -1.244, 1.9, 1.952 ], [ -0.712, -1.364, 0.952 ], [ -0.588, -0.344, -1.688 ], [ -1.22, 0.692, -0.148 ], [ 1.904, 0.632, -1.328 ], [ 0.06, -0.768, 1.64 ], ... ],
    	[ [ -1.1, 0.976, 1.028 ], [ 0.792, -0.792, -1.972 ], [ -0.512, -1.616, -1.812 ], [ 0.384, -1.296, 1.496 ], [ -0.572, -0.832, 0.996 ], [ 1.788, 1.22, -0.516 ], [ -0.92, 0.756, 1.816 ], [ 0.82, -0.48, -1.9 ], ... ],
    	...
    ]
    [
    	[ [ -0.168, -1.384, -1.008 ], [ 0.204, 1.692, -0.008 ], [ -1.26, -1.268, 1.316 ], [ 0.704, -1.768, -1.876 ], [ 0.06, -0.452, -0.36 ], [ 0.616, 0.016, 1.812 ], [ -1.18, -0.716, -1.248 ], [ -0.84, 0.364, 0.244 ], ... ],
    	[ [ 1.3, 1.144, 1.808 ], [ 1.892, 0.408, -1.168 ], [ -0.412, -0.412, 1.572 ], [ -0.32, 0.152, -1.112 ], [ 1.12, -1.888, 1.18 ], [ -1.528, 0.104, -1.376 ], [ -1.992, 1.232, -1.184 ], [ 0.82, -1.744, -0.952 ], ... ],
    	[ [ -1.308, -0.204, 0.436 ], [ -0.108, -0.084, 0.124 ], [ 1.588, -0.996, 1.096 ], [ 1.796, -1.356, 0.932 ], [ 0.064, -1.48, -0.076 ], [ 1.0, 0.568, -1.016 ], [ -0.364, 1.0, 1.404 ], [ 0.608, 0.516, -1.86 ], ... ],
    	[ [ 1.816, 0.456, 1.044 ], [ 0.812, -0.848, -0.432 ], [ 0.412, -0.04, 1.668 ], [ -0.376, -1.18, -0.736 ], [ -0.292, -1.368, 1.2 ], [ -1.624, -0.488, 0.336 ], [ 0.904, 0.284, 1.516 ], [ -1.308, 0.632, 0.856 ], ... ],
    	[ [ 1.468, -0.972, 0.124 ], [ 1.34, -0.348, 0.532 ], [ -0.548, -1.636, -0.812 ], [ 0.8, -0.116, -0.308 ], [ -1.388, -0.164, -0.144 ], [ 1.124, 0.864, -0.132 ], [ -1.64, -0.736, 1.988 ], [ -1.784, 1.72, -1.916 ], ... ],
    	[ [ 1.652, 1.228, 1.58 ], [ -0.52, -0.848, 0.484 ], [ -1.388, 1.84, 0.744 ], [ 1.528, -1.98, -0.552 ], [ 0.692, -1.38, 1.812 ], [ -1.296, 0.492, 1.712 ], [ 0.764, 1.84, -0.444 ], [ 1.848, 1.588, -1.788 ], ... ],
    	[ [ -1.864, 1.916, -1.408 ], [ 0.96, -0.424, -1.044 ], [ 1.228, 0.42, -0.104 ], [ 0.16, -1.284, 1.224 ], [ -1.992, 1.056, 1.04 ], [ 0.332, 0.568, -1.024 ], [ -1.78, -0.008, 0.152 ], [ 1.592, -1.316, -1.28 ], ... ],
    	[ [ 0.604, 0.648, -1.868 ], [ -0.756, -0.348, -0.916 ], [ -1.568, 1.952, -1.564 ], [ 0.948, 0.78, 1.16 ], [ -0.296, 0.856, 0.356 ], [ -1.064, 1.332, -1.284 ], [ 1.848, -0.24, -0.888 ], [ -0.724, 0.78, -1.496 ], ... ],
    	...
    ]

Gradient Descent

First, we train using basic gradient descent method apply weak line search conditions.

TrainingTester.java:480 executed in 30.61 seconds (3.049 gc):

    IterativeTrainer iterativeTrainer = new IterativeTrainer(trainable.addRef());
    try {
      iterativeTrainer.setLineSearchFactory(label -> new ArmijoWolfeSearch());
      iterativeTrainer.setOrientation(new GradientDescent());
      iterativeTrainer.setMonitor(TrainingTester.getMonitor(history));
      iterativeTrainer.setTimeout(30, TimeUnit.SECONDS);
      iterativeTrainer.setMaxIterations(250);
      iterativeTrainer.setTerminateThreshold(0);
      return iterativeTrainer.run();
    } finally {
      iterativeTrainer.freeRef();
    }
Logging
Reset training subject: 7750070949371
Reset training subject: 7750871979643
Zero gradient: 5.155595945474966E-17
Constructing line search parameters: GD
th(0)=1.1518877629468413E-13;dx=-2.6580169552997904E-33
WOLFE (weak): th(2.154434690031884)=1.1518877629468413E-13; dx=-2.6580169552997904E-33 evalInputDelta=0.0
WOLFE (weak): th(4.308869380063768)=1.1518877629468413E-13; dx=-2.6580169552997904E-33 evalInputDelta=0.0
WOLFE (weak): th(12.926608140191302)=1.1518877629468413E-13; dx=-2.6580169552997904E-33 evalInputDelta=0.0
WOLFE (weak): th(51.70643256076521)=1.1518877629468413E-13; dx=-2.6580169552997904E-33 evalInputDelta=0.0
WOLFE (weak): th(258.53216280382605)=1.1518877629468413E-13; dx=-2.6580169552997904E-33 evalInputDelta=0.0
WOLFE (weak): th(1551.1929768229563)=1.1518877629468413E-13; dx=-2.6580169552997904E-33 evalInputDelta=0.0
New Minimum: 1.1518877629468413E-13 > 1.1518877629468406E-13
WOLFE (weak): th(10858.350837760694)=1.1518877629468406E-13; dx=-2.658016955299788E-33 evalInputDelta=7.573064690121713E-29
New Minimum: 1.1518877629468406E-13 > 1.1518877629468375E-13
WOLFE (weak): th(86866.80670208555)=1.1518877629468375E-13; dx=-2.6580169552997756E-33 evalInputDelta=3.7865323450608567E-28
New Minimum: 1.1518877629468375E-13 > 1.1518877629468206E-13
WOLFE (weak): th(781801.26031877)=1.1518877629468206E-13; dx=-2.6580169552996905E-33 evalInputDelta=2.069971015299935E-27
New Minimum: 1.1518877629468206E-13 > 1.1518877629466356E-13
WOLFE (weak): th(7818012.6031877)=1.1518877629466356E-13; dx=-2.6580169552987842E-33 evalInputDelta=2.0573492408163988E-26
New Minimum: 1.1518877629466356E-13 > 1.15188776294454E-13
WOLFE (weak): th(8.599813863506469E7)=1.15188776294454E-13; dx=-2.6580169552885943E-33 evalInputDelta=2.3014543593279887E-25
MAX ALPHA (1.0319776636207763E9): th(8.599813863506469E7)=1.15188776294454E-13
Fitness changed from 1.1518877629468413E-13 to 1.15188776294454E-13
Iteration 1 complete. Error: 1.15188776294454E-13 Total: 20.2425; Orientation: 0.6905; Line Search: 17.4967
Zero gradient: 5.1555959454532494E-17
th(0)=1.15188776294454E-13;dx=-2.6580169552773983E-33
New Minimum: 1.15188776294454E-13 > 1.1518877629418913E-13
WOLFE (weak): th(1.0E8)=1.1518877629418913E-13; dx=-2.65801695526446E-33 evalInputDelta=2.648553157625234E-25
MAX ALPHA (2.0E8): th(1.0E8)=1.1518877629418913E-13
Fitness changed from 1.15188776294454E-13 to 1.1518877629418913E-13
Iteration 2 complete. Error: 1.1518877629418913E-13 Total: 4.7270; Orientation: 0.7774; Line Search: 3.3611
Zero gradient: 5.155595945428152E-17
th(0)=1.1518877629418913E-13;dx=-2.6580169552515205E-33
New Minimum: 1.1518877629418913E-13 > 1.1518877629392322E-13
WOLFE (weak): th(1.0E8)=1.1518877629392322E-13; dx=-2.65801695523855E-33 evalInputDelta=2.6591554481914043E-25
MAX ALPHA (2.0E8): th(1.0E8)=1.1518877629392322E-13
Fitness changed from 1.1518877629418913E-13 to 1.1518877629392322E-13
Iteration 3 complete. Error: 1.1518877629392322E-13 Total: 5.6272; Orientation: 0.7777; Line Search: 4.2357
Final threshold in iteration 3: 1.1518877629392322E-13 (> 0.0) after 30.598s (< 30.000s)

Returns

    1.1518877629392322E-13

Training Converged

Conjugate Gradient Descent

First, we use a conjugate gradient descent method, which converges the fastest for purely linear functions.

TrainingTester.java:452 executed in 33.91 seconds (1.076 gc):

    IterativeTrainer iterativeTrainer = new IterativeTrainer(trainable.addRef());
    try {
      iterativeTrainer.setLineSearchFactory(label -> new QuadraticSearch());
      iterativeTrainer.setOrientation(new GradientDescent());
      iterativeTrainer.setMonitor(TrainingTester.getMonitor(history));
      iterativeTrainer.setTimeout(30, TimeUnit.SECONDS);
      iterativeTrainer.setMaxIterations(250);
      iterativeTrainer.setTerminateThreshold(0);
      return iterativeTrainer.run();
    } finally {
      iterativeTrainer.freeRef();
    }
Logging
Reset training subject: 7780898408802
Reset training subject: 7781512106346
Zero gradient: 5.155595945474966E-17
Constructing line search parameters: GD
F(0.0) = LineSearchPoint{point=PointSample{avg=1.1518877629468413E-13}, derivative=-2.6580169552997904E-33}
F(1.0E-10) = LineSearchPoint{point=PointSample{avg=1.1518877629468413E-13}, derivative=-2.6580169552997904E-33}, evalInputDelta = 0.0
F(7.000000000000001E-10) = LineSearchPoint{point=PointSample{avg=1.1518877629468413E-13}, derivative=-2.6580169552997904E-33}, evalInputDelta = 0.0
F(4.900000000000001E-9) = LineSearchPoint{point=PointSample{avg=1.1518877629468413E-13}, derivative=-2.6580169552997904E-33}, evalInputDelta = 0.0
F(3.430000000000001E-8) = LineSearchPoint{point=PointSample{avg=1.1518877629468413E-13}, derivative=-2.6580169552997904E-33}, evalInputDelta = 0.0
F(2.4010000000000004E-7) = LineSearchPoint{point=PointSample{avg=1.1518877629468413E-13}, derivative=-2.6580169552997904E-33}, evalInputDelta = 0.0
F(1.6807000000000003E-6) = LineSearchPoint{point=PointSample{avg=1.1518877629468413E-13}, derivative=-2.6580169552997904E-33}, evalInputDelta = 0.0
F(1.1764900000000001E-5) = LineSearchPoint{point=PointSample{avg=1.1518877629468413E-13}, derivative=-2.6580169552997904E-33}, evalInputDelta = 0.0
F(8.235430000000001E-5) = LineSearchPoint{point=PointSample{avg=1.1518877629468413E-13}, derivative=-2.6580169552997904E-33}, evalInputDelta = 0.0
F(5.764801000000001E-4) = LineSearchPoint{point=PointSample{avg=1.1518877629468413E-13}, derivative=-2.6580169552997904E-33}, evalInputDelta = 0.0
F(0.004035360700000001) = LineSearchPoint{point=PointSample{avg=1.1518877629468413E-13}, derivative=-2.6580169552997904E-33}, evalInputDelta = 0.0
F(0.028247524900000005) = LineSearchPoint{point=PointSample{avg=1.1518877629468413E-13}, derivative=-2.6580169552997904E-33}, evalInputDelta = 0.0
F(0.19773267430000002) = LineSearchPoint{point=PointSample{avg=1.1518877629468413E-13}, derivative=-2.6580169552997904E-33}, evalInputDelta = 0.0
F(1.3841287201) = LineSearchPoint{point=PointSample{avg=1.1518877629468413E-13}, derivative=-2.6580169552997904E-33}, evalInputDelta = 0.0
F(9.688901040700001) = LineSearchPoint{point=PointSample{avg=1.1518877629468413E-13}, derivative=-2.6580169552997904E-33}, evalInputDelta = 0.0
F(67.8223072849) = LineSearchPoint{point=PointSample{avg=1.1518877629468413E-13}, derivative=-2.6580169552997904E-33}, evalInputDelta = 0.0
F(474.7561509943) = LineSearchPoint{point=PointSample{avg=1.1518877629468413E-13}, derivative=-2.6580169552997904E-33}, evalInputDelta = 0.0
New Minimum: 1.1518877629468413E-13 > 1.151887762946841E-13
F(3323.2930569601003) = LineSearchPoint{point=PointSample{avg=1.151887762946841E-13}, derivative=-2.65801695529979E-33}, evalInputDelta = -2.5243548967072378E-29
New Minimum: 1.151887762946841E-13 > 1.1518877629468403E-13
F(23263.0513987207) = LineSearchPoint{point=PointSample{avg=1.1518877629468403E-13}, derivative=-2.6580169552997866E-33}, evalInputDelta = -1.0097419586828951E-28
New Minimum: 1.1518877629468403E-13 > 1.1518877629468353E-13
F(162841.3597910449) = LineSearchPoint{point=PointSample{avg=1.1518877629468353E-13}, derivative=-2.658016955299765E-33}, evalInputDelta = -6.058451752097371E-28
New Minimum: 1.1518877629468353E-13 > 1.1518877629468148E-13
F(1139889.5185373144) = LineSearchPoint{point=PointSample{avg=1.1518877629468148E-13}, derivative=-2.6580169552996545E-33}, evalInputDelta = -2.6505726415425997E-27
New Minimum: 1.1518877629468148E-13 > 1.1518877629466316E-13
F(7979226.6297612) = LineSearchPoint{point=PointSample{avg=1.1518877629466316E-13}, derivative=-2.658016955298764E-33}, evalInputDelta = -2.0977389191637146E-26
New Minimum: 1.1518877629466316E-13 > 1.1518877629453537E-13
F(5.58545864083284E7) = LineSearchPoint{point=PointSample{avg=1.1518877629453537E-13}, derivative=-2.6580169552925403E-33}, evalInputDelta = -1.4876023406295752E-25
New Minimum: 1.1518877629453537E-13 > 1.151887762936439E-13
F(3.909821048582988E8) = LineSearchPoint{point=PointSample{avg=1.151887762936439E-13}, derivative=-2.6580169552490638E-33}, evalInputDelta = -1.0402361658351185E-24
New Minimum: 1.151887762936439E-13 > 1.151887762874084E-13
F(2.7368747340080914E9) = LineSearchPoint{point=PointSample{avg=1.151887762874084E-13}, derivative=-2.658016954944871E-33}, evalInputDelta = -7.275720926838568E-24
New Minimum: 1.151887762874084E-13 > 1.1518877624376035E-13
F(1.915812313805664E10) = LineSearchPoint{point=PointSample{avg=1.1518877624376035E-13}, derivative=-2.658016952815537E-33}, evalInputDelta = -5.092378608772614E-23
1.1518877624376035E-13 <= 1.1518877629468413E-13
F(1.0E10) = LineSearchPoint{point=PointSample{avg=1.151887762681027E-13}, derivative=-2.658016954003061E-33}, evalInputDelta = -2.6581431818778247E-23
1.151887762681027E-13 > 1.1518877624376035E-13
Fitness changed from 1.1518877629468413E-13 to 1.1518877624376035E-13
Iteration 1 complete. Error: 1.1518877624376035E-13 Total: 33.9127; Orientation: 0.7822; Line Search: 31.2758
Final threshold in iteration 1: 1.1518877624376035E-13 (> 0.0) after 33.913s (< 30.000s)

Returns

    1.1518877624376035E-13

Training Converged

Limited-Memory BFGS

Next, we apply the same optimization using L-BFGS, which is nearly ideal for purely second-order or quadratic functions.

TrainingTester.java:509 executed in 114.06 seconds (3.464 gc):

    IterativeTrainer iterativeTrainer = new IterativeTrainer(trainable.addRef());
    try {
      iterativeTrainer.setLineSearchFactory(label -> new ArmijoWolfeSearch());
      iterativeTrainer.setOrientation(new LBFGS());
      iterativeTrainer.setMonitor(TrainingTester.getMonitor(history));
      iterativeTrainer.setTimeout(30, TimeUnit.SECONDS);
      iterativeTrainer.setIterationsPerSample(100);
      iterativeTrainer.setMaxIterations(250);
      iterativeTrainer.setTerminateThreshold(0);
      return iterativeTrainer.run();
    } finally {
      iterativeTrainer.freeRef();
    }
Logging
Reset training subject: 7815053229691
Reset training subject: 7815673892110
Adding measurement 24a21193 to history. Total: 0
LBFGS Accumulation History: 1 points
Constructing line search parameters: GD
Non-optimal measurement 1.1518877629468413E-13 < 1.1518877629468413E-13. Total: 1
th(0)=1.1518877629468413E-13;dx=-2.6580169552997904E-33
Non-optimal measurement 1.1518877629468413E-13 < 1.1518877629468413E-13. Total: 1
WOLFE (weak): th(2.154434690031884)=1.1518877629468413E-13; dx=-2.6580169552997904E-33 evalInputDelta=0.0
Non-optimal measurement 1.1518877629468413E-13 < 1.1518877629468413E-13. Total: 1
WOLFE (weak): th(4.308869380063768)=1.1518877629468413E-13; dx=-2.6580169552997904E-33 evalInputDelta=0.0
Non-optimal measurement 1.1518877629468413E-13 < 1.1518877629468413E-13. Total: 1
WOLFE (weak): th(12.926608140191302)=1.1518877629468413E-13; dx=-2.6580169552997904E-33 evalInputDelta=0.0
Non-optimal measurement 1.1518877629468413E-13 < 1.1518877629468413E-13. Total: 1
WOLFE (weak): th(51.70643256076521)=1.1518877629468413E-13; dx=-2.6580169552997904E-33 evalInputDelta=0.0
Non-optimal measurement 1.1518877629468413E-13 < 1.1518877629468413E-13. Total: 1
WOLFE (weak): th(258.53216280382605)=1.1518877629468413E-13; dx=-2.6580169552997904E-33 evalInputDelta=0.0
Non-optimal measurement 1.1518877629468413E-13 < 1.1518877629468413E-13. Total: 1
WOLFE (weak): th(1551.1929768229563)=1.1518877629468413E-13; dx=-2.6580169552997904E-33 evalInputDelta=0.0
Adding measurement 5fac1906 to history. Total: 1
New Minimum: 1.1518877629468413E-13 > 1.1518877629468406E-13
WOLFE (weak): th(10858.350837760694)=1.1518877629468406E-13; dx=-2.658016955299788E-33 evalInputDelta=7.573064690121713E-29
Adding measurement 58aee238 to history. Total: 2
New Minimum: 1.1518877629468406E-13 > 1.1518877629468375E-13
WOLFE (weak): th(86866.80670208555)=1.1518877629468375E-13; dx=-2.6580169552997756E-33 evalInputDelta=3.7865323450608567E-28
Adding measurement 6883afba to history. Total: 3
New Minimum: 1.1518877629468375E-13 > 1.1518877629468206E-13
WOLFE (weak): th(781801.26031877)=1.1518877629468206E-13; dx=-2.6580169552996905E-33 evalInputDelta=2.069971015299935E-27
Adding measurement 63b65260 to history. Total: 4
New Minimum: 1.1518877629468206E-13 > 1.1518877629466356E-13
WOLFE (weak): th(7818012.6031877)=1.1518877629466356E-13; dx=-2.6580169552987842E-33 evalInputDelta=2.0573492408163988E-26
Adding measurement 22c57ff5 to history. Total: 5
New Minimum: 1.1518877629466356E-13 > 1.15188776294454E-13
WOLFE (weak): th(8.599813863506469E7)=1.15188776294454E-13; dx=-2.6580169552885943E-33 evalInputDelta=2.3014543593279887E-25
Non-optimal measurement 1.15188776294454E-13 < 1.15188776294454E-13. Total: 6
MAX ALPHA (1.0319776636207763E9): th(8.599813863506469E7)=1.15188776294454E-13
Fitness changed from 1.1518877629468413E-13 to 1.15188776294454E-13
Iteration 1 complete. Error: 1.15188776294454E-13 Total: 22.6808; Orientation: 0.9437; Line Search: 19.8796
Non-optimal measurement 1.15188776294454E-13 < 1.15188776294454E-13. Total: 6
Rejected: LBFGS Orientation magnitude: 8.951e+02, gradient 5.156e-17, dot -0.932; [fbe67153-afe3-44f8-8c13-5e31bae22b9e = 1.000/1.000e+00, 1adc6cee-06ba-4cd6-9d01-158a7b55715b = 1.000/1.000e+00, a7523594-7167-4e64-b084-3f6215a737d1 = 1.000/1.000e+00, e6ebf52e-f929-4625-ba7b-363672ded35b = 1.000/1.000e+00, ee3cbeed-8810-477c-bbb8-af911dfc92eb = 1.000/1.000e+00]
Orientation rejected. Popping history element from 1.15188776294454E-13, 1.1518877629466356E-13, 1.1518877629468206E-13, 1.1518877629468375E-13, 1.1518877629468406E-13, 1.1518877629468413E-13
Rejected: LBFGS Orientation magnitude: 1.209e+03, gradient 5.156e-17, dot -0.942; [fbe67153-afe3-44f8-8c13-5e31bae22b9e = 1.000/1.000e+00, e6ebf52e-f929-4625-ba7b-363672ded35b = 1.000/1.000e+00, a7523594-7167-4e64-b084-3f6215a737d1 = 1.000/1.000e+00, ee3cbeed-8810-477c-bbb8-af911dfc92eb = 1.000/1.000e+00, 1adc6cee-06ba-4cd6-9d01-158a7b55715b = 1.000/1.000e+00]
Orientation rejected. Popping history element from 1.15188776294454E-13, 1.1518877629466356E-13, 1.1518877629468206E-13, 1.1518877629468375E-13, 1.1518877629468406E-13
Rejected: LBFGS Orientation magnitude: 1.699e+03, gradient 5.156e-17, dot -0.946; [ee3cbeed-8810-477c-bbb8-af911dfc92eb = 1.000/1.000e+00, a7523594-7167-4e64-b084-3f6215a737d1 = 1.000/1.000e+00, e6ebf52e-f929-4625-ba7b-363672ded35b = 1.000/1.000e+00, 1adc6cee-06ba-4cd6-9d01-158a7b55715b = 1.000/1.000e+00, fbe67153-afe3-44f8-8c13-5e31bae22b9e = 1.000/1.000e+00]
Orientation rejected. Popping history element from 1.15188776294454E-13, 1.1518877629466356E-13, 1.1518877629468206E-13, 1.1518877629468375E-13
LBFGS Accumulation History: 3 points
Removed measurement 22c57ff5 to history. Total: 5
Removed measurement 63b65260 to history. Total: 4
Removed measurement 6883afba to history. Total: 3
Adding measurement f093f3a to history. Total: 3
th(0)=1.15188776294454E-13;dx=-2.6580169552773983E-33
Adding measurement 5612a418 to history. Total: 4
New Minimum: 1.15188776294454E-13 > 1.1518877629418913E-13
WOLFE (weak): th(1.0E8)=1.1518877629418913E-13; dx=-2.65801695526446E-33 evalInputDelta=2.648553157625234E-25
Non-optimal measurement 1.1518877629418913E-13 < 1.1518877629418913E-13. Total: 5
MAX ALPHA (2.0E8): th(1.0E8)=1.1518877629418913E-13
Fitness changed from 1.15188776294454E-13 to 1.1518877629418913E-13
Iteration 2 complete. Error: 1.1518877629418913E-13 Total: 91.3764; Orientation: 86.1945; Line Search: 4.5659
Final threshold in iteration 2: 1.1518877629418913E-13 (> 0.0) after 114.057s (< 30.000s)

Returns

    1.1518877629418913E-13

Training Converged

TrainingTester.java:432 executed in 0.16 seconds (0.000 gc):

    return TestUtil.compare(title + " vs Iteration", runs);
Logging
Plotting range=[1.0, -12.938589835607857], [3.0, -12.938589835416728]; valueStats=DoubleSummaryStatistics{count=6, sum=0.000000, min=0.000000, average=0.000000, max=0.000000}
Plotting 3 points for GD
Only 1 points for CjGD
Plotting 2 points for LBFGS

Returns

Result

TrainingTester.java:435 executed in 0.01 seconds (0.000 gc):

    return TestUtil.compareTime(title + " vs Time", runs);
Logging
Plotting range=[0.0, -12.938589835607857], [91.376, -12.938589835416728]; valueStats=DoubleSummaryStatistics{count=6, sum=0.000000, min=0.000000, average=0.000000, max=0.000000}
Plotting 3 points for GD
Only 1 points for CjGD
Plotting 2 points for LBFGS

Returns

Result

Results

TrainingTester.java:255 executed in 0.00 seconds (0.000 gc):

    return grid(inputLearning, modelLearning, completeLearning);

Returns

Result

TrainingTester.java:258 executed in 0.00 seconds (0.000 gc):

    return new ComponentResult(null == inputLearning ? null : inputLearning.value,
        null == modelLearning ? null : modelLearning.value, null == completeLearning ? null : completeLearning.value);

Returns

    {"input":{ "LBFGS": { "type": "Converged", "value": 1.1518877629418913E-13 }, "CjGD": { "type": "Converged", "value": 1.1518877624376035E-13 }, "GD": { "type": "Converged", "value": 1.1518877629392322E-13 } }, "model":null, "complete":null}

LayerTests.java:425 executed in 0.00 seconds (0.000 gc):

    throwException(exceptions.addRef());

Results

detailsresult
{"input":{ "LBFGS": { "type": "Converged", "value": 1.1518877629418913E-13 }, "CjGD": { "type": "Converged", "value": 1.1518877624376035E-13 }, "GD": { "type": "Converged", "value": 1.1518877629392322E-13 } }, "model":null, "complete":null}OK
  {
    "result": "OK",
    "performance": {
      "execution_time": "183.331",
      "gc_time": "7.858"
    },
    "created_on": 1586742378287,
    "file_name": "trainingTest",
    "report": {
      "simpleName": "Basic",
      "canonicalName": "com.simiacryptus.mindseye.layers.cudnn.SoftmaxLayerTest.Basic",
      "link": "https://github.com/SimiaCryptus/mindseye-cudnn/tree/59d5b3318556370acb2d83ee6ec123ce0fc6974f/src/test/java/com/simiacryptus/mindseye/layers/cudnn/SoftmaxLayerTest.java",
      "javaDoc": ""
    },
    "training_analysis": {
      "input": {
        "LBFGS": {
          "type": "Converged",
          "value": 1.1518877629418913E-13
        },
        "CjGD": {
          "type": "Converged",
          "value": 1.1518877624376035E-13
        },
        "GD": {
          "type": "Converged",
          "value": 1.1518877629392322E-13
        }
      }
    },
    "archive": "s3://code.simiacrypt.us/tests/com/simiacryptus/mindseye/layers/cudnn/SoftmaxActivationLayer/Basic/trainingTest/202004134618",
    "id": "e60d10e7-584b-4383-94c5-e347659179e5",
    "report_type": "Components",
    "display_name": "Comparative Training",
    "target": {
      "simpleName": "SoftmaxActivationLayer",
      "canonicalName": "com.simiacryptus.mindseye.layers.cudnn.SoftmaxActivationLayer",
      "link": "https://github.com/SimiaCryptus/mindseye-cudnn/tree/59d5b3318556370acb2d83ee6ec123ce0fc6974f/src/main/java/com/simiacryptus/mindseye/layers/cudnn/SoftmaxActivationLayer.java",
      "javaDoc": ""
    }
  }