001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    
018    package org.apache.commons.math3.linear;
019    
020    import java.io.Serializable;
021    import java.util.Arrays;
022    
023    import org.apache.commons.math3.exception.DimensionMismatchException;
024    import org.apache.commons.math3.exception.NoDataException;
025    import org.apache.commons.math3.exception.NotStrictlyPositiveException;
026    import org.apache.commons.math3.exception.NullArgumentException;
027    import org.apache.commons.math3.exception.NumberIsTooSmallException;
028    import org.apache.commons.math3.exception.OutOfRangeException;
029    import org.apache.commons.math3.exception.util.LocalizedFormats;
030    import org.apache.commons.math3.util.FastMath;
031    import org.apache.commons.math3.util.MathUtils;
032    
033    /**
034     * Cache-friendly implementation of RealMatrix using a flat arrays to store
035     * square blocks of the matrix.
036     * <p>
037     * This implementation is specially designed to be cache-friendly. Square blocks are
038     * stored as small arrays and allow efficient traversal of data both in row major direction
039     * and columns major direction, one block at a time. This greatly increases performances
040     * for algorithms that use crossed directions loops like multiplication or transposition.
041     * </p>
042     * <p>
043     * The size of square blocks is a static parameter. It may be tuned according to the cache
044     * size of the target computer processor. As a rule of thumbs, it should be the largest
045     * value that allows three blocks to be simultaneously cached (this is necessary for example
046     * for matrix multiplication). The default value is to use 52x52 blocks which is well suited
047     * for processors with 64k L1 cache (one block holds 2704 values or 21632 bytes). This value
048     * could be lowered to 36x36 for processors with 32k L1 cache.
049     * </p>
050     * <p>
051     * The regular blocks represent {@link #BLOCK_SIZE} x {@link #BLOCK_SIZE} squares. Blocks
052     * at right hand side and bottom side which may be smaller to fit matrix dimensions. The square
053     * blocks are flattened in row major order in single dimension arrays which are therefore
054     * {@link #BLOCK_SIZE}<sup>2</sup> elements long for regular blocks. The blocks are themselves
055     * organized in row major order.
056     * </p>
057     * <p>
058     * As an example, for a block size of 52x52, a 100x60 matrix would be stored in 4 blocks.
059     * Block 0 would be a double[2704] array holding the upper left 52x52 square, block 1 would be
060     * a double[416] array holding the upper right 52x8 rectangle, block 2 would be a double[2496]
061     * array holding the lower left 48x52 rectangle and block 3 would be a double[384] array
062     * holding the lower right 48x8 rectangle.
063     * </p>
064     * <p>
065     * The layout complexity overhead versus simple mapping of matrices to java
066     * arrays is negligible for small matrices (about 1%). The gain from cache efficiency leads
067     * to up to 3-fold improvements for matrices of moderate to large size.
068     * </p>
069     * @version $Id: BlockRealMatrix.java 1416643 2012-12-03 19:37:14Z tn $
070     * @since 2.0
071     */
072    public class BlockRealMatrix extends AbstractRealMatrix implements Serializable {
073        /** Block size. */
074        public static final int BLOCK_SIZE = 52;
075        /** Serializable version identifier */
076        private static final long serialVersionUID = 4991895511313664478L;
077        /** Blocks of matrix entries. */
078        private final double blocks[][];
079        /** Number of rows of the matrix. */
080        private final int rows;
081        /** Number of columns of the matrix. */
082        private final int columns;
083        /** Number of block rows of the matrix. */
084        private final int blockRows;
085        /** Number of block columns of the matrix. */
086        private final int blockColumns;
087    
088        /**
089         * Create a new matrix with the supplied row and column dimensions.
090         *
091         * @param rows  the number of rows in the new matrix
092         * @param columns  the number of columns in the new matrix
093         * @throws NotStrictlyPositiveException if row or column dimension is not
094         * positive.
095         */
096        public BlockRealMatrix(final int rows, final int columns)
097            throws NotStrictlyPositiveException {
098            super(rows, columns);
099            this.rows = rows;
100            this.columns = columns;
101    
102            // number of blocks
103            blockRows = (rows + BLOCK_SIZE - 1) / BLOCK_SIZE;
104            blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
105    
106            // allocate storage blocks, taking care of smaller ones at right and bottom
107            blocks = createBlocksLayout(rows, columns);
108        }
109    
110        /**
111         * Create a new dense matrix copying entries from raw layout data.
112         * <p>The input array <em>must</em> already be in raw layout.</p>
113         * <p>Calling this constructor is equivalent to call:
114         * <pre>matrix = new BlockRealMatrix(rawData.length, rawData[0].length,
115         *                                   toBlocksLayout(rawData), false);</pre>
116         * </p>
117         *
118         * @param rawData data for new matrix, in raw layout
119         * @throws DimensionMismatchException if the shape of {@code blockData} is
120         * inconsistent with block layout.
121         * @throws NotStrictlyPositiveException if row or column dimension is not
122         * positive.
123         * @see #BlockRealMatrix(int, int, double[][], boolean)
124         */
125        public BlockRealMatrix(final double[][] rawData)
126            throws DimensionMismatchException, NotStrictlyPositiveException {
127            this(rawData.length, rawData[0].length, toBlocksLayout(rawData), false);
128        }
129    
130        /**
131         * Create a new dense matrix copying entries from block layout data.
132         * <p>The input array <em>must</em> already be in blocks layout.</p>
133         *
134         * @param rows Number of rows in the new matrix.
135         * @param columns Number of columns in the new matrix.
136         * @param blockData data for new matrix
137         * @param copyArray Whether the input array will be copied or referenced.
138         * @throws DimensionMismatchException if the shape of {@code blockData} is
139         * inconsistent with block layout.
140         * @throws NotStrictlyPositiveException if row or column dimension is not
141         * positive.
142         * @see #createBlocksLayout(int, int)
143         * @see #toBlocksLayout(double[][])
144         * @see #BlockRealMatrix(double[][])
145         */
146        public BlockRealMatrix(final int rows, final int columns,
147                               final double[][] blockData, final boolean copyArray)
148            throws DimensionMismatchException, NotStrictlyPositiveException {
149            super(rows, columns);
150            this.rows = rows;
151            this.columns = columns;
152    
153            // number of blocks
154            blockRows = (rows + BLOCK_SIZE - 1) / BLOCK_SIZE;
155            blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
156    
157            if (copyArray) {
158                // allocate storage blocks, taking care of smaller ones at right and bottom
159                blocks = new double[blockRows * blockColumns][];
160            } else {
161                // reference existing array
162                blocks = blockData;
163            }
164    
165            int index = 0;
166            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
167                final int iHeight = blockHeight(iBlock);
168                for (int jBlock = 0; jBlock < blockColumns; ++jBlock, ++index) {
169                    if (blockData[index].length != iHeight * blockWidth(jBlock)) {
170                        throw new DimensionMismatchException(blockData[index].length,
171                                                             iHeight * blockWidth(jBlock));
172                    }
173                    if (copyArray) {
174                        blocks[index] = blockData[index].clone();
175                    }
176                }
177            }
178        }
179    
180        /**
181         * Convert a data array from raw layout to blocks layout.
182         * <p>
183         * Raw layout is the straightforward layout where element at row i and
184         * column j is in array element <code>rawData[i][j]</code>. Blocks layout
185         * is the layout used in {@link BlockRealMatrix} instances, where the matrix
186         * is split in square blocks (except at right and bottom side where blocks may
187         * be rectangular to fit matrix size) and each block is stored in a flattened
188         * one-dimensional array.
189         * </p>
190         * <p>
191         * This method creates an array in blocks layout from an input array in raw layout.
192         * It can be used to provide the array argument of the {@link
193         * #BlockRealMatrix(int, int, double[][], boolean)} constructor.
194         * </p>
195         * @param rawData Data array in raw layout.
196         * @return a new data array containing the same entries but in blocks layout.
197         * @throws DimensionMismatchException if {@code rawData} is not rectangular.
198         * @see #createBlocksLayout(int, int)
199         * @see #BlockRealMatrix(int, int, double[][], boolean)
200         */
201        public static double[][] toBlocksLayout(final double[][] rawData)
202            throws DimensionMismatchException {
203            final int rows = rawData.length;
204            final int columns = rawData[0].length;
205            final int blockRows = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
206            final int blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
207    
208            // safety checks
209            for (int i = 0; i < rawData.length; ++i) {
210                final int length = rawData[i].length;
211                if (length != columns) {
212                    throw new DimensionMismatchException(columns, length);
213                }
214            }
215    
216            // convert array
217            final double[][] blocks = new double[blockRows * blockColumns][];
218            int blockIndex = 0;
219            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
220                final int pStart = iBlock * BLOCK_SIZE;
221                final int pEnd = FastMath.min(pStart + BLOCK_SIZE, rows);
222                final int iHeight = pEnd - pStart;
223                for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
224                    final int qStart = jBlock * BLOCK_SIZE;
225                    final int qEnd = FastMath.min(qStart + BLOCK_SIZE, columns);
226                    final int jWidth = qEnd - qStart;
227    
228                    // allocate new block
229                    final double[] block = new double[iHeight * jWidth];
230                    blocks[blockIndex] = block;
231    
232                    // copy data
233                    int index = 0;
234                    for (int p = pStart; p < pEnd; ++p) {
235                        System.arraycopy(rawData[p], qStart, block, index, jWidth);
236                        index += jWidth;
237                    }
238                    ++blockIndex;
239                }
240            }
241    
242            return blocks;
243        }
244    
245        /**
246         * Create a data array in blocks layout.
247         * <p>
248         * This method can be used to create the array argument of the {@link
249         * #BlockRealMatrix(int, int, double[][], boolean)} constructor.
250         * </p>
251         * @param rows Number of rows in the new matrix.
252         * @param columns Number of columns in the new matrix.
253         * @return a new data array in blocks layout.
254         * @see #toBlocksLayout(double[][])
255         * @see #BlockRealMatrix(int, int, double[][], boolean)
256         */
257        public static double[][] createBlocksLayout(final int rows, final int columns) {
258            final int blockRows = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
259            final int blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
260    
261            final double[][] blocks = new double[blockRows * blockColumns][];
262            int blockIndex = 0;
263            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
264                final int pStart = iBlock * BLOCK_SIZE;
265                final int pEnd = FastMath.min(pStart + BLOCK_SIZE, rows);
266                final int iHeight = pEnd - pStart;
267                for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
268                    final int qStart = jBlock * BLOCK_SIZE;
269                    final int qEnd = FastMath.min(qStart + BLOCK_SIZE, columns);
270                    final int jWidth = qEnd - qStart;
271                    blocks[blockIndex] = new double[iHeight * jWidth];
272                    ++blockIndex;
273                }
274            }
275    
276            return blocks;
277        }
278    
279        /** {@inheritDoc} */
280        @Override
281        public BlockRealMatrix createMatrix(final int rowDimension,
282                                            final int columnDimension)
283            throws NotStrictlyPositiveException {
284            return new BlockRealMatrix(rowDimension, columnDimension);
285        }
286    
287        /** {@inheritDoc} */
288        @Override
289        public BlockRealMatrix copy() {
290            // create an empty matrix
291            BlockRealMatrix copied = new BlockRealMatrix(rows, columns);
292    
293            // copy the blocks
294            for (int i = 0; i < blocks.length; ++i) {
295                System.arraycopy(blocks[i], 0, copied.blocks[i], 0, blocks[i].length);
296            }
297    
298            return copied;
299        }
300    
301        /** {@inheritDoc} */
302        @Override
303        public BlockRealMatrix add(final RealMatrix m)
304            throws MatrixDimensionMismatchException {
305            try {
306                return add((BlockRealMatrix) m);
307            } catch (ClassCastException cce) {
308                // safety check
309                MatrixUtils.checkAdditionCompatible(this, m);
310    
311                final BlockRealMatrix out = new BlockRealMatrix(rows, columns);
312    
313                // perform addition block-wise, to ensure good cache behavior
314                int blockIndex = 0;
315                for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
316                    for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
317    
318                        // perform addition on the current block
319                        final double[] outBlock = out.blocks[blockIndex];
320                        final double[] tBlock   = blocks[blockIndex];
321                        final int pStart = iBlock * BLOCK_SIZE;
322                        final int pEnd = FastMath.min(pStart + BLOCK_SIZE, rows);
323                        final int qStart = jBlock * BLOCK_SIZE;
324                        final int qEnd = FastMath.min(qStart + BLOCK_SIZE, columns);
325                        int k = 0;
326                        for (int p = pStart; p < pEnd; ++p) {
327                            for (int q = qStart; q < qEnd; ++q) {
328                                outBlock[k] = tBlock[k] + m.getEntry(p, q);
329                                ++k;
330                            }
331                        }
332                        // go to next block
333                        ++blockIndex;
334                    }
335                }
336    
337                return out;
338            }
339        }
340    
341        /**
342         * Compute the sum of this matrix and {@code m}.
343         *
344         * @param m Matrix to be added.
345         * @return {@code this} + m.
346         * @throws MatrixDimensionMismatchException if {@code m} is not the same
347         * size as this matrix.
348         */
349        public BlockRealMatrix add(final BlockRealMatrix m)
350            throws MatrixDimensionMismatchException {
351            // safety check
352            MatrixUtils.checkAdditionCompatible(this, m);
353    
354            final BlockRealMatrix out = new BlockRealMatrix(rows, columns);
355    
356            // perform addition block-wise, to ensure good cache behavior
357            for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
358                final double[] outBlock = out.blocks[blockIndex];
359                final double[] tBlock = blocks[blockIndex];
360                final double[] mBlock = m.blocks[blockIndex];
361                for (int k = 0; k < outBlock.length; ++k) {
362                    outBlock[k] = tBlock[k] + mBlock[k];
363                }
364            }
365    
366            return out;
367        }
368    
369        /** {@inheritDoc} */
370        @Override
371        public BlockRealMatrix subtract(final RealMatrix m)
372            throws MatrixDimensionMismatchException {
373            try {
374                return subtract((BlockRealMatrix) m);
375            } catch (ClassCastException cce) {
376                // safety check
377                MatrixUtils.checkSubtractionCompatible(this, m);
378    
379                final BlockRealMatrix out = new BlockRealMatrix(rows, columns);
380    
381                // perform subtraction block-wise, to ensure good cache behavior
382                int blockIndex = 0;
383                for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
384                    for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
385    
386                        // perform subtraction on the current block
387                        final double[] outBlock = out.blocks[blockIndex];
388                        final double[] tBlock = blocks[blockIndex];
389                        final int pStart = iBlock * BLOCK_SIZE;
390                        final int pEnd = FastMath.min(pStart + BLOCK_SIZE, rows);
391                        final int qStart = jBlock * BLOCK_SIZE;
392                        final int qEnd = FastMath.min(qStart + BLOCK_SIZE, columns);
393                        int k = 0;
394                        for (int p = pStart; p < pEnd; ++p) {
395                            for (int q = qStart; q < qEnd; ++q) {
396                                outBlock[k] = tBlock[k] - m.getEntry(p, q);
397                                ++k;
398                            }
399                        }
400                        // go to next block
401                        ++blockIndex;
402                    }
403                }
404    
405                return out;
406            }
407        }
408    
409        /**
410         * Subtract {@code m} from this matrix.
411         *
412         * @param m Matrix to be subtracted.
413         * @return {@code this} - m.
414         * @throws MatrixDimensionMismatchException if {@code m} is not the
415         * same size as this matrix.
416         */
417        public BlockRealMatrix subtract(final BlockRealMatrix m)
418            throws MatrixDimensionMismatchException {
419            // safety check
420            MatrixUtils.checkSubtractionCompatible(this, m);
421    
422            final BlockRealMatrix out = new BlockRealMatrix(rows, columns);
423    
424            // perform subtraction block-wise, to ensure good cache behavior
425            for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
426                final double[] outBlock = out.blocks[blockIndex];
427                final double[] tBlock = blocks[blockIndex];
428                final double[] mBlock = m.blocks[blockIndex];
429                for (int k = 0; k < outBlock.length; ++k) {
430                    outBlock[k] = tBlock[k] - mBlock[k];
431                }
432            }
433    
434            return out;
435        }
436    
437        /** {@inheritDoc} */
438        @Override
439        public BlockRealMatrix scalarAdd(final double d) {
440    
441            final BlockRealMatrix out = new BlockRealMatrix(rows, columns);
442    
443            // perform subtraction block-wise, to ensure good cache behavior
444            for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
445                final double[] outBlock = out.blocks[blockIndex];
446                final double[] tBlock = blocks[blockIndex];
447                for (int k = 0; k < outBlock.length; ++k) {
448                    outBlock[k] = tBlock[k] + d;
449                }
450            }
451    
452            return out;
453        }
454    
455        /** {@inheritDoc} */
456        @Override
457        public RealMatrix scalarMultiply(final double d) {
458            final BlockRealMatrix out = new BlockRealMatrix(rows, columns);
459    
460            // perform subtraction block-wise, to ensure good cache behavior
461            for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
462                final double[] outBlock = out.blocks[blockIndex];
463                final double[] tBlock = blocks[blockIndex];
464                for (int k = 0; k < outBlock.length; ++k) {
465                    outBlock[k] = tBlock[k] * d;
466                }
467            }
468    
469            return out;
470        }
471    
472        /** {@inheritDoc} */
473        @Override
474        public BlockRealMatrix multiply(final RealMatrix m)
475            throws DimensionMismatchException {
476            try {
477                return multiply((BlockRealMatrix) m);
478            } catch (ClassCastException cce) {
479                // safety check
480                MatrixUtils.checkMultiplicationCompatible(this, m);
481    
482                final BlockRealMatrix out = new BlockRealMatrix(rows, m.getColumnDimension());
483    
484                // perform multiplication block-wise, to ensure good cache behavior
485                int blockIndex = 0;
486                for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
487                    final int pStart = iBlock * BLOCK_SIZE;
488                    final int pEnd = FastMath.min(pStart + BLOCK_SIZE, rows);
489    
490                    for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
491                        final int qStart = jBlock * BLOCK_SIZE;
492                        final int qEnd = FastMath.min(qStart + BLOCK_SIZE, m.getColumnDimension());
493    
494                        // select current block
495                        final double[] outBlock = out.blocks[blockIndex];
496    
497                        // perform multiplication on current block
498                        for (int kBlock = 0; kBlock < blockColumns; ++kBlock) {
499                            final int kWidth = blockWidth(kBlock);
500                            final double[] tBlock = blocks[iBlock * blockColumns + kBlock];
501                            final int rStart = kBlock * BLOCK_SIZE;
502                            int k = 0;
503                            for (int p = pStart; p < pEnd; ++p) {
504                                final int lStart = (p - pStart) * kWidth;
505                                final int lEnd = lStart + kWidth;
506                                for (int q = qStart; q < qEnd; ++q) {
507                                    double sum = 0;
508                                    int r = rStart;
509                                    for (int l = lStart; l < lEnd; ++l) {
510                                        sum += tBlock[l] * m.getEntry(r, q);
511                                        ++r;
512                                    }
513                                    outBlock[k] += sum;
514                                    ++k;
515                                }
516                            }
517                        }
518                        // go to next block
519                        ++blockIndex;
520                    }
521                }
522    
523                return out;
524            }
525        }
526    
527        /**
528         * Returns the result of postmultiplying this by {@code m}.
529         *
530         * @param m Matrix to postmultiply by.
531         * @return {@code this} * m.
532         * @throws DimensionMismatchException if the matrices are not compatible.
533         */
534        public BlockRealMatrix multiply(BlockRealMatrix m)
535            throws DimensionMismatchException {
536            // safety check
537            MatrixUtils.checkMultiplicationCompatible(this, m);
538    
539            final BlockRealMatrix out = new BlockRealMatrix(rows, m.columns);
540    
541            // perform multiplication block-wise, to ensure good cache behavior
542            int blockIndex = 0;
543            for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
544    
545                final int pStart = iBlock * BLOCK_SIZE;
546                final int pEnd = FastMath.min(pStart + BLOCK_SIZE, rows);
547    
548                for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
549                    final int jWidth = out.blockWidth(jBlock);
550                    final int jWidth2 = jWidth  + jWidth;
551                    final int jWidth3 = jWidth2 + jWidth;
552                    final int jWidth4 = jWidth3 + jWidth;
553    
554                    // select current block
555                    final double[] outBlock = out.blocks[blockIndex];
556    
557                    // perform multiplication on current block
558                    for (int kBlock = 0; kBlock < blockColumns; ++kBlock) {
559                        final int kWidth = blockWidth(kBlock);
560                        final double[] tBlock = blocks[iBlock * blockColumns + kBlock];
561                        final double[] mBlock = m.blocks[kBlock * m.blockColumns + jBlock];
562                        int k = 0;
563                        for (int p = pStart; p < pEnd; ++p) {
564                            final int lStart = (p - pStart) * kWidth;
565                            final int lEnd = lStart + kWidth;
566                            for (int nStart = 0; nStart < jWidth; ++nStart) {
567                                double sum = 0;
568                                int l = lStart;
569                                int n = nStart;
570                                while (l < lEnd - 3) {
571                                    sum += tBlock[l] * mBlock[n] +
572                                           tBlock[l + 1] * mBlock[n + jWidth] +
573                                           tBlock[l + 2] * mBlock[n + jWidth2] +
574                                           tBlock[l + 3] * mBlock[n + jWidth3];
575                                    l += 4;
576                                    n += jWidth4;
577                                }
578                                while (l < lEnd) {
579                                    sum += tBlock[l++] * mBlock[n];
580                                    n += jWidth;
581                                }
582                                outBlock[k] += sum;
583                                ++k;
584                            }
585                        }
586                    }
587                    // go to next block
588                    ++blockIndex;
589                }
590            }
591    
592            return out;
593        }
594    
595        /** {@inheritDoc} */
596        @Override
597        public double[][] getData() {
598            final double[][] data = new double[getRowDimension()][getColumnDimension()];
599            final int lastColumns = columns - (blockColumns - 1) * BLOCK_SIZE;
600    
601            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
602                final int pStart = iBlock * BLOCK_SIZE;
603                final int pEnd = FastMath.min(pStart + BLOCK_SIZE, rows);
604                int regularPos = 0;
605                int lastPos = 0;
606                for (int p = pStart; p < pEnd; ++p) {
607                    final double[] dataP = data[p];
608                    int blockIndex = iBlock * blockColumns;
609                    int dataPos = 0;
610                    for (int jBlock = 0; jBlock < blockColumns - 1; ++jBlock) {
611                        System.arraycopy(blocks[blockIndex++], regularPos, dataP, dataPos, BLOCK_SIZE);
612                        dataPos += BLOCK_SIZE;
613                    }
614                    System.arraycopy(blocks[blockIndex], lastPos, dataP, dataPos, lastColumns);
615                    regularPos += BLOCK_SIZE;
616                    lastPos    += lastColumns;
617                }
618            }
619    
620            return data;
621        }
622    
623        /** {@inheritDoc} */
624        @Override
625        public double getNorm() {
626            final double[] colSums = new double[BLOCK_SIZE];
627            double maxColSum = 0;
628            for (int jBlock = 0; jBlock < blockColumns; jBlock++) {
629                final int jWidth = blockWidth(jBlock);
630                Arrays.fill(colSums, 0, jWidth, 0.0);
631                for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
632                    final int iHeight = blockHeight(iBlock);
633                    final double[] block = blocks[iBlock * blockColumns + jBlock];
634                    for (int j = 0; j < jWidth; ++j) {
635                        double sum = 0;
636                        for (int i = 0; i < iHeight; ++i) {
637                            sum += FastMath.abs(block[i * jWidth + j]);
638                        }
639                        colSums[j] += sum;
640                    }
641                }
642                for (int j = 0; j < jWidth; ++j) {
643                    maxColSum = FastMath.max(maxColSum, colSums[j]);
644                }
645            }
646            return maxColSum;
647        }
648    
649        /** {@inheritDoc} */
650        @Override
651        public double getFrobeniusNorm() {
652            double sum2 = 0;
653            for (int blockIndex = 0; blockIndex < blocks.length; ++blockIndex) {
654                for (final double entry : blocks[blockIndex]) {
655                    sum2 += entry * entry;
656                }
657            }
658            return FastMath.sqrt(sum2);
659        }
660    
661        /** {@inheritDoc} */
662        @Override
663        public BlockRealMatrix getSubMatrix(final int startRow, final int endRow,
664                                            final int startColumn,
665                                            final int endColumn)
666            throws OutOfRangeException, NumberIsTooSmallException {
667            // safety checks
668            MatrixUtils.checkSubMatrixIndex(this, startRow, endRow, startColumn, endColumn);
669    
670            // create the output matrix
671            final BlockRealMatrix out =
672                new BlockRealMatrix(endRow - startRow + 1, endColumn - startColumn + 1);
673    
674            // compute blocks shifts
675            final int blockStartRow = startRow / BLOCK_SIZE;
676            final int rowsShift = startRow % BLOCK_SIZE;
677            final int blockStartColumn = startColumn / BLOCK_SIZE;
678            final int columnsShift = startColumn % BLOCK_SIZE;
679    
680            // perform extraction block-wise, to ensure good cache behavior
681            int pBlock = blockStartRow;
682            for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
683                final int iHeight = out.blockHeight(iBlock);
684                int qBlock = blockStartColumn;
685                for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
686                    final int jWidth = out.blockWidth(jBlock);
687    
688                    // handle one block of the output matrix
689                    final int outIndex = iBlock * out.blockColumns + jBlock;
690                    final double[] outBlock = out.blocks[outIndex];
691                    final int index = pBlock * blockColumns + qBlock;
692                    final int width = blockWidth(qBlock);
693    
694                    final int heightExcess = iHeight + rowsShift - BLOCK_SIZE;
695                    final int widthExcess = jWidth + columnsShift - BLOCK_SIZE;
696                    if (heightExcess > 0) {
697                        // the submatrix block spans on two blocks rows from the original matrix
698                        if (widthExcess > 0) {
699                            // the submatrix block spans on two blocks columns from the original matrix
700                            final int width2 = blockWidth(qBlock + 1);
701                            copyBlockPart(blocks[index], width,
702                                          rowsShift, BLOCK_SIZE,
703                                          columnsShift, BLOCK_SIZE,
704                                          outBlock, jWidth, 0, 0);
705                            copyBlockPart(blocks[index + 1], width2,
706                                          rowsShift, BLOCK_SIZE,
707                                          0, widthExcess,
708                                          outBlock, jWidth, 0, jWidth - widthExcess);
709                            copyBlockPart(blocks[index + blockColumns], width,
710                                          0, heightExcess,
711                                          columnsShift, BLOCK_SIZE,
712                                          outBlock, jWidth, iHeight - heightExcess, 0);
713                            copyBlockPart(blocks[index + blockColumns + 1], width2,
714                                          0, heightExcess,
715                                          0, widthExcess,
716                                          outBlock, jWidth, iHeight - heightExcess, jWidth - widthExcess);
717                        } else {
718                            // the submatrix block spans on one block column from the original matrix
719                            copyBlockPart(blocks[index], width,
720                                          rowsShift, BLOCK_SIZE,
721                                          columnsShift, jWidth + columnsShift,
722                                          outBlock, jWidth, 0, 0);
723                            copyBlockPart(blocks[index + blockColumns], width,
724                                          0, heightExcess,
725                                          columnsShift, jWidth + columnsShift,
726                                          outBlock, jWidth, iHeight - heightExcess, 0);
727                        }
728                    } else {
729                        // the submatrix block spans on one block row from the original matrix
730                        if (widthExcess > 0) {
731                            // the submatrix block spans on two blocks columns from the original matrix
732                            final int width2 = blockWidth(qBlock + 1);
733                            copyBlockPart(blocks[index], width,
734                                          rowsShift, iHeight + rowsShift,
735                                          columnsShift, BLOCK_SIZE,
736                                          outBlock, jWidth, 0, 0);
737                            copyBlockPart(blocks[index + 1], width2,
738                                          rowsShift, iHeight + rowsShift,
739                                          0, widthExcess,
740                                          outBlock, jWidth, 0, jWidth - widthExcess);
741                        } else {
742                            // the submatrix block spans on one block column from the original matrix
743                            copyBlockPart(blocks[index], width,
744                                          rowsShift, iHeight + rowsShift,
745                                          columnsShift, jWidth + columnsShift,
746                                          outBlock, jWidth, 0, 0);
747                        }
748                   }
749                    ++qBlock;
750                }
751                ++pBlock;
752            }
753    
754            return out;
755        }
756    
757        /**
758         * Copy a part of a block into another one
759         * <p>This method can be called only when the specified part fits in both
760         * blocks, no verification is done here.</p>
761         * @param srcBlock source block
762         * @param srcWidth source block width ({@link #BLOCK_SIZE} or smaller)
763         * @param srcStartRow start row in the source block
764         * @param srcEndRow end row (exclusive) in the source block
765         * @param srcStartColumn start column in the source block
766         * @param srcEndColumn end column (exclusive) in the source block
767         * @param dstBlock destination block
768         * @param dstWidth destination block width ({@link #BLOCK_SIZE} or smaller)
769         * @param dstStartRow start row in the destination block
770         * @param dstStartColumn start column in the destination block
771         */
772        private void copyBlockPart(final double[] srcBlock, final int srcWidth,
773                                   final int srcStartRow, final int srcEndRow,
774                                   final int srcStartColumn, final int srcEndColumn,
775                                   final double[] dstBlock, final int dstWidth,
776                                   final int dstStartRow, final int dstStartColumn) {
777            final int length = srcEndColumn - srcStartColumn;
778            int srcPos = srcStartRow * srcWidth + srcStartColumn;
779            int dstPos = dstStartRow * dstWidth + dstStartColumn;
780            for (int srcRow = srcStartRow; srcRow < srcEndRow; ++srcRow) {
781                System.arraycopy(srcBlock, srcPos, dstBlock, dstPos, length);
782                srcPos += srcWidth;
783                dstPos += dstWidth;
784            }
785        }
786    
787        /** {@inheritDoc} */
788        @Override
789        public void setSubMatrix(final double[][] subMatrix, final int row,
790                                 final int column)
791            throws OutOfRangeException, NoDataException, NullArgumentException,
792            DimensionMismatchException {
793            // safety checks
794            MathUtils.checkNotNull(subMatrix);
795            final int refLength = subMatrix[0].length;
796            if (refLength == 0) {
797                throw new NoDataException(LocalizedFormats.AT_LEAST_ONE_COLUMN);
798            }
799            final int endRow = row + subMatrix.length - 1;
800            final int endColumn = column + refLength - 1;
801            MatrixUtils.checkSubMatrixIndex(this, row, endRow, column, endColumn);
802            for (final double[] subRow : subMatrix) {
803                if (subRow.length != refLength) {
804                    throw new DimensionMismatchException(refLength, subRow.length);
805                }
806            }
807    
808            // compute blocks bounds
809            final int blockStartRow = row / BLOCK_SIZE;
810            final int blockEndRow = (endRow + BLOCK_SIZE) / BLOCK_SIZE;
811            final int blockStartColumn = column / BLOCK_SIZE;
812            final int blockEndColumn = (endColumn + BLOCK_SIZE) / BLOCK_SIZE;
813    
814            // perform copy block-wise, to ensure good cache behavior
815            for (int iBlock = blockStartRow; iBlock < blockEndRow; ++iBlock) {
816                final int iHeight = blockHeight(iBlock);
817                final int firstRow = iBlock * BLOCK_SIZE;
818                final int iStart = FastMath.max(row,    firstRow);
819                final int iEnd = FastMath.min(endRow + 1, firstRow + iHeight);
820    
821                for (int jBlock = blockStartColumn; jBlock < blockEndColumn; ++jBlock) {
822                    final int jWidth = blockWidth(jBlock);
823                    final int firstColumn = jBlock * BLOCK_SIZE;
824                    final int jStart = FastMath.max(column,    firstColumn);
825                    final int jEnd = FastMath.min(endColumn + 1, firstColumn + jWidth);
826                    final int jLength = jEnd - jStart;
827    
828                    // handle one block, row by row
829                    final double[] block = blocks[iBlock * blockColumns + jBlock];
830                    for (int i = iStart; i < iEnd; ++i) {
831                        System.arraycopy(subMatrix[i - row], jStart - column,
832                                         block, (i - firstRow) * jWidth + (jStart - firstColumn),
833                                         jLength);
834                    }
835    
836                }
837            }
838        }
839    
840        /** {@inheritDoc} */
841        @Override
842        public BlockRealMatrix getRowMatrix(final int row)
843            throws OutOfRangeException {
844            MatrixUtils.checkRowIndex(this, row);
845            final BlockRealMatrix out = new BlockRealMatrix(1, columns);
846    
847            // perform copy block-wise, to ensure good cache behavior
848            final int iBlock = row / BLOCK_SIZE;
849            final int iRow = row - iBlock * BLOCK_SIZE;
850            int outBlockIndex = 0;
851            int outIndex = 0;
852            double[] outBlock = out.blocks[outBlockIndex];
853            for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
854                final int jWidth = blockWidth(jBlock);
855                final double[] block = blocks[iBlock * blockColumns + jBlock];
856                final int available = outBlock.length - outIndex;
857                if (jWidth > available) {
858                    System.arraycopy(block, iRow * jWidth, outBlock, outIndex, available);
859                    outBlock = out.blocks[++outBlockIndex];
860                    System.arraycopy(block, iRow * jWidth, outBlock, 0, jWidth - available);
861                    outIndex = jWidth - available;
862                } else {
863                    System.arraycopy(block, iRow * jWidth, outBlock, outIndex, jWidth);
864                    outIndex += jWidth;
865                }
866            }
867    
868            return out;
869        }
870    
871        /** {@inheritDoc} */
872        @Override
873        public void setRowMatrix(final int row, final RealMatrix matrix)
874            throws OutOfRangeException, MatrixDimensionMismatchException {
875            try {
876                setRowMatrix(row, (BlockRealMatrix) matrix);
877            } catch (ClassCastException cce) {
878                super.setRowMatrix(row, matrix);
879            }
880        }
881    
882        /**
883         * Sets the entries in row number <code>row</code>
884         * as a row matrix.  Row indices start at 0.
885         *
886         * @param row the row to be set
887         * @param matrix row matrix (must have one row and the same number of columns
888         * as the instance)
889         * @throws OutOfRangeException if the specified row index is invalid.
890         * @throws MatrixDimensionMismatchException if the matrix dimensions do
891         * not match one instance row.
892         */
893        public void setRowMatrix(final int row, final BlockRealMatrix matrix)
894            throws OutOfRangeException, MatrixDimensionMismatchException {
895            MatrixUtils.checkRowIndex(this, row);
896            final int nCols = getColumnDimension();
897            if ((matrix.getRowDimension() != 1) ||
898                (matrix.getColumnDimension() != nCols)) {
899                throw new MatrixDimensionMismatchException(matrix.getRowDimension(),
900                                                           matrix.getColumnDimension(),
901                                                           1, nCols);
902            }
903    
904            // perform copy block-wise, to ensure good cache behavior
905            final int iBlock = row / BLOCK_SIZE;
906            final int iRow = row - iBlock * BLOCK_SIZE;
907            int mBlockIndex = 0;
908            int mIndex = 0;
909            double[] mBlock = matrix.blocks[mBlockIndex];
910            for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
911                final int jWidth = blockWidth(jBlock);
912                final double[] block = blocks[iBlock * blockColumns + jBlock];
913                final int available  = mBlock.length - mIndex;
914                if (jWidth > available) {
915                    System.arraycopy(mBlock, mIndex, block, iRow * jWidth, available);
916                    mBlock = matrix.blocks[++mBlockIndex];
917                    System.arraycopy(mBlock, 0, block, iRow * jWidth, jWidth - available);
918                    mIndex = jWidth - available;
919                } else {
920                    System.arraycopy(mBlock, mIndex, block, iRow * jWidth, jWidth);
921                    mIndex += jWidth;
922               }
923            }
924        }
925    
926        /** {@inheritDoc} */
927        @Override
928        public BlockRealMatrix getColumnMatrix(final int column)
929            throws OutOfRangeException {
930            MatrixUtils.checkColumnIndex(this, column);
931            final BlockRealMatrix out = new BlockRealMatrix(rows, 1);
932    
933            // perform copy block-wise, to ensure good cache behavior
934            final int jBlock = column / BLOCK_SIZE;
935            final int jColumn = column - jBlock * BLOCK_SIZE;
936            final int jWidth = blockWidth(jBlock);
937            int outBlockIndex = 0;
938            int outIndex = 0;
939            double[] outBlock = out.blocks[outBlockIndex];
940            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
941                final int iHeight = blockHeight(iBlock);
942                final double[] block = blocks[iBlock * blockColumns + jBlock];
943                for (int i = 0; i < iHeight; ++i) {
944                    if (outIndex >= outBlock.length) {
945                        outBlock = out.blocks[++outBlockIndex];
946                        outIndex = 0;
947                    }
948                    outBlock[outIndex++] = block[i * jWidth + jColumn];
949                }
950            }
951    
952            return out;
953        }
954    
955        /** {@inheritDoc} */
956        @Override
957        public void setColumnMatrix(final int column, final RealMatrix matrix)
958            throws OutOfRangeException, MatrixDimensionMismatchException {
959            try {
960                setColumnMatrix(column, (BlockRealMatrix) matrix);
961            } catch (ClassCastException cce) {
962                super.setColumnMatrix(column, matrix);
963            }
964        }
965    
966        /**
967         * Sets the entries in column number <code>column</code>
968         * as a column matrix.  Column indices start at 0.
969         *
970         * @param column the column to be set
971         * @param matrix column matrix (must have one column and the same number of rows
972         * as the instance)
973         * @throws OutOfRangeException if the specified column index is invalid.
974         * @throws MatrixDimensionMismatchException if the matrix dimensions do
975         * not match one instance column.
976         */
977        void setColumnMatrix(final int column, final BlockRealMatrix matrix)
978            throws OutOfRangeException, MatrixDimensionMismatchException {
979            MatrixUtils.checkColumnIndex(this, column);
980            final int nRows = getRowDimension();
981            if ((matrix.getRowDimension() != nRows) ||
982                (matrix.getColumnDimension() != 1)) {
983                throw new MatrixDimensionMismatchException(matrix.getRowDimension(),
984                                                           matrix.getColumnDimension(),
985                                                           nRows, 1);
986            }
987    
988            // perform copy block-wise, to ensure good cache behavior
989            final int jBlock = column / BLOCK_SIZE;
990            final int jColumn = column - jBlock * BLOCK_SIZE;
991            final int jWidth = blockWidth(jBlock);
992            int mBlockIndex = 0;
993            int mIndex = 0;
994            double[] mBlock = matrix.blocks[mBlockIndex];
995            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
996                final int iHeight = blockHeight(iBlock);
997                final double[] block = blocks[iBlock * blockColumns + jBlock];
998                for (int i = 0; i < iHeight; ++i) {
999                    if (mIndex >= mBlock.length) {
1000                        mBlock = matrix.blocks[++mBlockIndex];
1001                        mIndex = 0;
1002                    }
1003                    block[i * jWidth + jColumn] = mBlock[mIndex++];
1004                }
1005            }
1006        }
1007    
1008        /** {@inheritDoc} */
1009        @Override
1010        public RealVector getRowVector(final int row)
1011            throws OutOfRangeException {
1012            MatrixUtils.checkRowIndex(this, row);
1013            final double[] outData = new double[columns];
1014    
1015            // perform copy block-wise, to ensure good cache behavior
1016            final int iBlock = row / BLOCK_SIZE;
1017            final int iRow = row - iBlock * BLOCK_SIZE;
1018            int outIndex = 0;
1019            for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1020                final int jWidth = blockWidth(jBlock);
1021                final double[] block = blocks[iBlock * blockColumns + jBlock];
1022                System.arraycopy(block, iRow * jWidth, outData, outIndex, jWidth);
1023                outIndex += jWidth;
1024            }
1025    
1026            return new ArrayRealVector(outData, false);
1027        }
1028    
1029        /** {@inheritDoc} */
1030        @Override
1031        public void setRowVector(final int row, final RealVector vector)
1032            throws OutOfRangeException, MatrixDimensionMismatchException {
1033            try {
1034                setRow(row, ((ArrayRealVector) vector).getDataRef());
1035            } catch (ClassCastException cce) {
1036                super.setRowVector(row, vector);
1037            }
1038        }
1039    
1040        /** {@inheritDoc} */
1041        @Override
1042        public RealVector getColumnVector(final int column)
1043            throws OutOfRangeException {
1044            MatrixUtils.checkColumnIndex(this, column);
1045            final double[] outData = new double[rows];
1046    
1047            // perform copy block-wise, to ensure good cache behavior
1048            final int jBlock = column / BLOCK_SIZE;
1049            final int jColumn = column - jBlock * BLOCK_SIZE;
1050            final int jWidth = blockWidth(jBlock);
1051            int outIndex = 0;
1052            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1053                final int iHeight = blockHeight(iBlock);
1054                final double[] block = blocks[iBlock * blockColumns + jBlock];
1055                for (int i = 0; i < iHeight; ++i) {
1056                    outData[outIndex++] = block[i * jWidth + jColumn];
1057                }
1058            }
1059    
1060            return new ArrayRealVector(outData, false);
1061        }
1062    
1063        /** {@inheritDoc} */
1064        @Override
1065        public void setColumnVector(final int column, final RealVector vector)
1066            throws OutOfRangeException, MatrixDimensionMismatchException {
1067            try {
1068                setColumn(column, ((ArrayRealVector) vector).getDataRef());
1069            } catch (ClassCastException cce) {
1070                super.setColumnVector(column, vector);
1071            }
1072        }
1073    
1074        /** {@inheritDoc} */
1075        @Override
1076        public double[] getRow(final int row) throws OutOfRangeException {
1077            MatrixUtils.checkRowIndex(this, row);
1078            final double[] out = new double[columns];
1079    
1080            // perform copy block-wise, to ensure good cache behavior
1081            final int iBlock = row / BLOCK_SIZE;
1082            final int iRow = row - iBlock * BLOCK_SIZE;
1083            int outIndex = 0;
1084            for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1085                final int jWidth     = blockWidth(jBlock);
1086                final double[] block = blocks[iBlock * blockColumns + jBlock];
1087                System.arraycopy(block, iRow * jWidth, out, outIndex, jWidth);
1088                outIndex += jWidth;
1089            }
1090    
1091            return out;
1092        }
1093    
1094        /** {@inheritDoc} */
1095        @Override
1096        public void setRow(final int row, final double[] array)
1097            throws OutOfRangeException, MatrixDimensionMismatchException {
1098            MatrixUtils.checkRowIndex(this, row);
1099            final int nCols = getColumnDimension();
1100            if (array.length != nCols) {
1101                throw new MatrixDimensionMismatchException(1, array.length, 1, nCols);
1102            }
1103    
1104            // perform copy block-wise, to ensure good cache behavior
1105            final int iBlock = row / BLOCK_SIZE;
1106            final int iRow = row - iBlock * BLOCK_SIZE;
1107            int outIndex = 0;
1108            for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1109                final int jWidth     = blockWidth(jBlock);
1110                final double[] block = blocks[iBlock * blockColumns + jBlock];
1111                System.arraycopy(array, outIndex, block, iRow * jWidth, jWidth);
1112                outIndex += jWidth;
1113            }
1114        }
1115    
1116        /** {@inheritDoc} */
1117        @Override
1118        public double[] getColumn(final int column) throws OutOfRangeException {
1119            MatrixUtils.checkColumnIndex(this, column);
1120            final double[] out = new double[rows];
1121    
1122            // perform copy block-wise, to ensure good cache behavior
1123            final int jBlock  = column / BLOCK_SIZE;
1124            final int jColumn = column - jBlock * BLOCK_SIZE;
1125            final int jWidth  = blockWidth(jBlock);
1126            int outIndex = 0;
1127            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1128                final int iHeight = blockHeight(iBlock);
1129                final double[] block = blocks[iBlock * blockColumns + jBlock];
1130                for (int i = 0; i < iHeight; ++i) {
1131                    out[outIndex++] = block[i * jWidth + jColumn];
1132                }
1133            }
1134    
1135            return out;
1136        }
1137    
1138        /** {@inheritDoc} */
1139        @Override
1140        public void setColumn(final int column, final double[] array)
1141            throws OutOfRangeException, MatrixDimensionMismatchException {
1142            MatrixUtils.checkColumnIndex(this, column);
1143            final int nRows = getRowDimension();
1144            if (array.length != nRows) {
1145                throw new MatrixDimensionMismatchException(array.length, 1, nRows, 1);
1146            }
1147    
1148            // perform copy block-wise, to ensure good cache behavior
1149            final int jBlock  = column / BLOCK_SIZE;
1150            final int jColumn = column - jBlock * BLOCK_SIZE;
1151            final int jWidth = blockWidth(jBlock);
1152            int outIndex = 0;
1153            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1154                final int iHeight = blockHeight(iBlock);
1155                final double[] block = blocks[iBlock * blockColumns + jBlock];
1156                for (int i = 0; i < iHeight; ++i) {
1157                    block[i * jWidth + jColumn] = array[outIndex++];
1158                }
1159            }
1160        }
1161    
1162        /** {@inheritDoc} */
1163        @Override
1164        public double getEntry(final int row, final int column)
1165            throws OutOfRangeException {
1166            MatrixUtils.checkMatrixIndex(this, row, column);
1167            final int iBlock = row / BLOCK_SIZE;
1168            final int jBlock = column / BLOCK_SIZE;
1169            final int k = (row - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1170                (column - jBlock * BLOCK_SIZE);
1171            return blocks[iBlock * blockColumns + jBlock][k];
1172        }
1173    
1174        /** {@inheritDoc} */
1175        @Override
1176        public void setEntry(final int row, final int column, final double value)
1177            throws OutOfRangeException {
1178            MatrixUtils.checkMatrixIndex(this, row, column);
1179            final int iBlock = row / BLOCK_SIZE;
1180            final int jBlock = column / BLOCK_SIZE;
1181            final int k = (row - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1182                (column - jBlock * BLOCK_SIZE);
1183            blocks[iBlock * blockColumns + jBlock][k] = value;
1184        }
1185    
1186        /** {@inheritDoc} */
1187        @Override
1188        public void addToEntry(final int row, final int column,
1189                               final double increment)
1190            throws OutOfRangeException {
1191            MatrixUtils.checkMatrixIndex(this, row, column);
1192            final int iBlock = row    / BLOCK_SIZE;
1193            final int jBlock = column / BLOCK_SIZE;
1194            final int k = (row    - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1195                (column - jBlock * BLOCK_SIZE);
1196            blocks[iBlock * blockColumns + jBlock][k] += increment;
1197        }
1198    
1199        /** {@inheritDoc} */
1200        @Override
1201        public void multiplyEntry(final int row, final int column,
1202                                  final double factor)
1203            throws OutOfRangeException {
1204            MatrixUtils.checkMatrixIndex(this, row, column);
1205            final int iBlock = row / BLOCK_SIZE;
1206            final int jBlock = column / BLOCK_SIZE;
1207            final int k = (row - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1208                (column - jBlock * BLOCK_SIZE);
1209            blocks[iBlock * blockColumns + jBlock][k] *= factor;
1210        }
1211    
1212        /** {@inheritDoc} */
1213        @Override
1214        public BlockRealMatrix transpose() {
1215            final int nRows = getRowDimension();
1216            final int nCols = getColumnDimension();
1217            final BlockRealMatrix out = new BlockRealMatrix(nCols, nRows);
1218    
1219            // perform transpose block-wise, to ensure good cache behavior
1220            int blockIndex = 0;
1221            for (int iBlock = 0; iBlock < blockColumns; ++iBlock) {
1222                for (int jBlock = 0; jBlock < blockRows; ++jBlock) {
1223                    // transpose current block
1224                    final double[] outBlock = out.blocks[blockIndex];
1225                    final double[] tBlock = blocks[jBlock * blockColumns + iBlock];
1226                    final int pStart = iBlock * BLOCK_SIZE;
1227                    final int pEnd = FastMath.min(pStart + BLOCK_SIZE, columns);
1228                    final int qStart = jBlock * BLOCK_SIZE;
1229                    final int qEnd = FastMath.min(qStart + BLOCK_SIZE, rows);
1230                    int k = 0;
1231                    for (int p = pStart; p < pEnd; ++p) {
1232                        final int lInc = pEnd - pStart;
1233                        int l = p - pStart;
1234                        for (int q = qStart; q < qEnd; ++q) {
1235                            outBlock[k] = tBlock[l];
1236                            ++k;
1237                            l+= lInc;
1238                        }
1239                    }
1240                    // go to next block
1241                    ++blockIndex;
1242                }
1243            }
1244    
1245            return out;
1246        }
1247    
1248        /** {@inheritDoc} */
1249        @Override
1250        public int getRowDimension() {
1251            return rows;
1252        }
1253    
1254        /** {@inheritDoc} */
1255        @Override
1256        public int getColumnDimension() {
1257            return columns;
1258        }
1259    
1260        /** {@inheritDoc} */
1261        @Override
1262        public double[] operate(final double[] v)
1263            throws DimensionMismatchException {
1264            if (v.length != columns) {
1265                throw new DimensionMismatchException(v.length, columns);
1266            }
1267            final double[] out = new double[rows];
1268    
1269            // perform multiplication block-wise, to ensure good cache behavior
1270            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1271                final int pStart = iBlock * BLOCK_SIZE;
1272                final int pEnd = FastMath.min(pStart + BLOCK_SIZE, rows);
1273                for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1274                    final double[] block  = blocks[iBlock * blockColumns + jBlock];
1275                    final int qStart = jBlock * BLOCK_SIZE;
1276                    final int qEnd = FastMath.min(qStart + BLOCK_SIZE, columns);
1277                    int k = 0;
1278                    for (int p = pStart; p < pEnd; ++p) {
1279                        double sum = 0;
1280                        int q = qStart;
1281                        while (q < qEnd - 3) {
1282                            sum += block[k]     * v[q]     +
1283                                   block[k + 1] * v[q + 1] +
1284                                   block[k + 2] * v[q + 2] +
1285                                   block[k + 3] * v[q + 3];
1286                            k += 4;
1287                            q += 4;
1288                        }
1289                        while (q < qEnd) {
1290                            sum += block[k++] * v[q++];
1291                        }
1292                        out[p] += sum;
1293                    }
1294                }
1295            }
1296    
1297            return out;
1298        }
1299    
1300        /** {@inheritDoc} */
1301        @Override
1302        public double[] preMultiply(final double[] v)
1303            throws DimensionMismatchException {
1304            if (v.length != rows) {
1305                throw new DimensionMismatchException(v.length, rows);
1306            }
1307            final double[] out = new double[columns];
1308    
1309            // perform multiplication block-wise, to ensure good cache behavior
1310            for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1311                final int jWidth  = blockWidth(jBlock);
1312                final int jWidth2 = jWidth  + jWidth;
1313                final int jWidth3 = jWidth2 + jWidth;
1314                final int jWidth4 = jWidth3 + jWidth;
1315                final int qStart = jBlock * BLOCK_SIZE;
1316                final int qEnd = FastMath.min(qStart + BLOCK_SIZE, columns);
1317                for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1318                    final double[] block  = blocks[iBlock * blockColumns + jBlock];
1319                    final int pStart = iBlock * BLOCK_SIZE;
1320                    final int pEnd = FastMath.min(pStart + BLOCK_SIZE, rows);
1321                    for (int q = qStart; q < qEnd; ++q) {
1322                        int k = q - qStart;
1323                        double sum = 0;
1324                        int p = pStart;
1325                        while (p < pEnd - 3) {
1326                            sum += block[k]           * v[p]     +
1327                                   block[k + jWidth]  * v[p + 1] +
1328                                   block[k + jWidth2] * v[p + 2] +
1329                                   block[k + jWidth3] * v[p + 3];
1330                            k += jWidth4;
1331                            p += 4;
1332                        }
1333                        while (p < pEnd) {
1334                            sum += block[k] * v[p++];
1335                            k += jWidth;
1336                        }
1337                        out[q] += sum;
1338                    }
1339                }
1340            }
1341    
1342            return out;
1343        }
1344    
1345        /** {@inheritDoc} */
1346        @Override
1347        public double walkInRowOrder(final RealMatrixChangingVisitor visitor) {
1348            visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1349            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1350                final int pStart = iBlock * BLOCK_SIZE;
1351                final int pEnd = FastMath.min(pStart + BLOCK_SIZE, rows);
1352                for (int p = pStart; p < pEnd; ++p) {
1353                    for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1354                        final int jWidth = blockWidth(jBlock);
1355                        final int qStart = jBlock * BLOCK_SIZE;
1356                        final int qEnd = FastMath.min(qStart + BLOCK_SIZE, columns);
1357                        final double[] block = blocks[iBlock * blockColumns + jBlock];
1358                        int k = (p - pStart) * jWidth;
1359                        for (int q = qStart; q < qEnd; ++q) {
1360                            block[k] = visitor.visit(p, q, block[k]);
1361                            ++k;
1362                        }
1363                    }
1364                 }
1365            }
1366            return visitor.end();
1367        }
1368    
1369        /** {@inheritDoc} */
1370        @Override
1371        public double walkInRowOrder(final RealMatrixPreservingVisitor visitor) {
1372            visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1373            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1374                final int pStart = iBlock * BLOCK_SIZE;
1375                final int pEnd = FastMath.min(pStart + BLOCK_SIZE, rows);
1376                for (int p = pStart; p < pEnd; ++p) {
1377                    for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1378                        final int jWidth = blockWidth(jBlock);
1379                        final int qStart = jBlock * BLOCK_SIZE;
1380                        final int qEnd = FastMath.min(qStart + BLOCK_SIZE, columns);
1381                        final double[] block = blocks[iBlock * blockColumns + jBlock];
1382                        int k = (p - pStart) * jWidth;
1383                        for (int q = qStart; q < qEnd; ++q) {
1384                            visitor.visit(p, q, block[k]);
1385                            ++k;
1386                        }
1387                    }
1388                 }
1389            }
1390            return visitor.end();
1391        }
1392    
1393        /** {@inheritDoc} */
1394        @Override
1395        public double walkInRowOrder(final RealMatrixChangingVisitor visitor,
1396                                     final int startRow, final int endRow,
1397                                     final int startColumn, final int endColumn)
1398            throws OutOfRangeException, NumberIsTooSmallException {
1399            MatrixUtils.checkSubMatrixIndex(this, startRow, endRow, startColumn, endColumn);
1400            visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1401            for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1402                final int p0 = iBlock * BLOCK_SIZE;
1403                final int pStart = FastMath.max(startRow, p0);
1404                final int pEnd = FastMath.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1405                for (int p = pStart; p < pEnd; ++p) {
1406                    for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1407                        final int jWidth = blockWidth(jBlock);
1408                        final int q0 = jBlock * BLOCK_SIZE;
1409                        final int qStart = FastMath.max(startColumn, q0);
1410                        final int qEnd = FastMath.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1411                        final double[] block = blocks[iBlock * blockColumns + jBlock];
1412                        int k = (p - p0) * jWidth + qStart - q0;
1413                        for (int q = qStart; q < qEnd; ++q) {
1414                            block[k] = visitor.visit(p, q, block[k]);
1415                            ++k;
1416                        }
1417                    }
1418                 }
1419            }
1420            return visitor.end();
1421        }
1422    
1423        /** {@inheritDoc} */
1424        @Override
1425        public double walkInRowOrder(final RealMatrixPreservingVisitor visitor,
1426                                     final int startRow, final int endRow,
1427                                     final int startColumn, final int endColumn)
1428            throws OutOfRangeException, NumberIsTooSmallException {
1429            MatrixUtils.checkSubMatrixIndex(this, startRow, endRow, startColumn, endColumn);
1430            visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1431            for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1432                final int p0 = iBlock * BLOCK_SIZE;
1433                final int pStart = FastMath.max(startRow, p0);
1434                final int pEnd = FastMath.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1435                for (int p = pStart; p < pEnd; ++p) {
1436                    for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1437                        final int jWidth = blockWidth(jBlock);
1438                        final int q0 = jBlock * BLOCK_SIZE;
1439                        final int qStart = FastMath.max(startColumn, q0);
1440                        final int qEnd = FastMath.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1441                        final double[] block = blocks[iBlock * blockColumns + jBlock];
1442                        int k = (p - p0) * jWidth + qStart - q0;
1443                        for (int q = qStart; q < qEnd; ++q) {
1444                            visitor.visit(p, q, block[k]);
1445                            ++k;
1446                        }
1447                    }
1448                 }
1449            }
1450            return visitor.end();
1451        }
1452    
1453        /** {@inheritDoc} */
1454        @Override
1455        public double walkInOptimizedOrder(final RealMatrixChangingVisitor visitor) {
1456            visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1457            int blockIndex = 0;
1458            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1459                final int pStart = iBlock * BLOCK_SIZE;
1460                final int pEnd = FastMath.min(pStart + BLOCK_SIZE, rows);
1461                for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1462                    final int qStart = jBlock * BLOCK_SIZE;
1463                    final int qEnd = FastMath.min(qStart + BLOCK_SIZE, columns);
1464                    final double[] block = blocks[blockIndex];
1465                    int k = 0;
1466                    for (int p = pStart; p < pEnd; ++p) {
1467                        for (int q = qStart; q < qEnd; ++q) {
1468                            block[k] = visitor.visit(p, q, block[k]);
1469                            ++k;
1470                        }
1471                    }
1472                    ++blockIndex;
1473                }
1474            }
1475            return visitor.end();
1476        }
1477    
1478        /** {@inheritDoc} */
1479        @Override
1480        public double walkInOptimizedOrder(final RealMatrixPreservingVisitor visitor) {
1481            visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1482            int blockIndex = 0;
1483            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1484                final int pStart = iBlock * BLOCK_SIZE;
1485                final int pEnd = FastMath.min(pStart + BLOCK_SIZE, rows);
1486                for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1487                    final int qStart = jBlock * BLOCK_SIZE;
1488                    final int qEnd = FastMath.min(qStart + BLOCK_SIZE, columns);
1489                    final double[] block = blocks[blockIndex];
1490                    int k = 0;
1491                    for (int p = pStart; p < pEnd; ++p) {
1492                        for (int q = qStart; q < qEnd; ++q) {
1493                            visitor.visit(p, q, block[k]);
1494                            ++k;
1495                        }
1496                    }
1497                    ++blockIndex;
1498                }
1499            }
1500            return visitor.end();
1501        }
1502    
1503        /** {@inheritDoc} */
1504        @Override
1505        public double walkInOptimizedOrder(final RealMatrixChangingVisitor visitor,
1506                                           final int startRow, final int endRow,
1507                                           final int startColumn,
1508                                           final int endColumn)
1509            throws OutOfRangeException, NumberIsTooSmallException {
1510            MatrixUtils.checkSubMatrixIndex(this, startRow, endRow, startColumn, endColumn);
1511            visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1512            for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1513                final int p0 = iBlock * BLOCK_SIZE;
1514                final int pStart = FastMath.max(startRow, p0);
1515                final int pEnd = FastMath.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1516                for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1517                    final int jWidth = blockWidth(jBlock);
1518                    final int q0 = jBlock * BLOCK_SIZE;
1519                    final int qStart = FastMath.max(startColumn, q0);
1520                    final int qEnd = FastMath.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1521                    final double[] block = blocks[iBlock * blockColumns + jBlock];
1522                    for (int p = pStart; p < pEnd; ++p) {
1523                        int k = (p - p0) * jWidth + qStart - q0;
1524                        for (int q = qStart; q < qEnd; ++q) {
1525                            block[k] = visitor.visit(p, q, block[k]);
1526                            ++k;
1527                        }
1528                    }
1529                }
1530            }
1531            return visitor.end();
1532        }
1533    
1534        /** {@inheritDoc} */
1535        @Override
1536        public double walkInOptimizedOrder(final RealMatrixPreservingVisitor visitor,
1537                                           final int startRow, final int endRow,
1538                                           final int startColumn,
1539                                           final int endColumn)
1540            throws OutOfRangeException, NumberIsTooSmallException {
1541            MatrixUtils.checkSubMatrixIndex(this, startRow, endRow, startColumn, endColumn);
1542            visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1543            for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1544                final int p0 = iBlock * BLOCK_SIZE;
1545                final int pStart = FastMath.max(startRow, p0);
1546                final int pEnd = FastMath.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1547                for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1548                    final int jWidth = blockWidth(jBlock);
1549                    final int q0 = jBlock * BLOCK_SIZE;
1550                    final int qStart = FastMath.max(startColumn, q0);
1551                    final int qEnd = FastMath.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1552                    final double[] block = blocks[iBlock * blockColumns + jBlock];
1553                    for (int p = pStart; p < pEnd; ++p) {
1554                        int k = (p - p0) * jWidth + qStart - q0;
1555                        for (int q = qStart; q < qEnd; ++q) {
1556                            visitor.visit(p, q, block[k]);
1557                            ++k;
1558                        }
1559                    }
1560                }
1561            }
1562            return visitor.end();
1563        }
1564    
1565        /**
1566         * Get the height of a block.
1567         * @param blockRow row index (in block sense) of the block
1568         * @return height (number of rows) of the block
1569         */
1570        private int blockHeight(final int blockRow) {
1571            return (blockRow == blockRows - 1) ? rows - blockRow * BLOCK_SIZE : BLOCK_SIZE;
1572        }
1573    
1574        /**
1575         * Get the width of a block.
1576         * @param blockColumn column index (in block sense) of the block
1577         * @return width (number of columns) of the block
1578         */
1579        private int blockWidth(final int blockColumn) {
1580            return (blockColumn == blockColumns - 1) ? columns - blockColumn * BLOCK_SIZE : BLOCK_SIZE;
1581        }
1582    }