1 /**
2 * Copyright 2009 The Apache Software Foundation
3 *
4 * Licensed to the Apache Software Foundation (ASF) under one
5 * or more contributor license agreements. See the NOTICE file
6 * distributed with this work for additional information
7 * regarding copyright ownership. The ASF licenses this file
8 * to you under the Apache License, Version 2.0 (the
9 * "License"); you may not use this file except in compliance
10 * with the License. You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 */
20 package org.apache.hadoop.hbase.regionserver.wal;
21
22 import java.io.DataInput;
23 import java.io.DataOutput;
24 import java.io.IOException;
25 import java.util.ArrayList;
26 import java.util.List;
27 import java.util.NavigableMap;
28 import java.util.TreeMap;
29
30 import org.apache.hadoop.hbase.KeyValue;
31 import org.apache.hadoop.hbase.util.Bytes;
32 import org.apache.hadoop.hbase.util.ClassSize;
33 import org.apache.hadoop.io.Writable;
34
35 /**
36 * WALEdit: Used in HBase's transaction log (WAL) to represent
37 * the collection of edits (KeyValue objects) corresponding to a
38 * single transaction. The class implements "Writable" interface
39 * for serializing/deserializing a set of KeyValue items.
40 *
41 * Previously, if a transaction contains 3 edits to c1, c2, c3 for a row R,
42 * the HLog would have three log entries as follows:
43 *
44 * <logseq1-for-edit1>:<KeyValue-for-edit-c1>
45 * <logseq2-for-edit2>:<KeyValue-for-edit-c2>
46 * <logseq3-for-edit3>:<KeyValue-for-edit-c3>
47 *
48 * This presents problems because row level atomicity of transactions
49 * was not guaranteed. If we crash after few of the above appends make
50 * it, then recovery will restore a partial transaction.
51 *
52 * In the new world, all the edits for a given transaction are written
53 * out as a single record, for example:
54 *
55 * <logseq#-for-entire-txn>:<WALEdit-for-entire-txn>
56 *
57 * where, the WALEdit is serialized as:
58 * <-1, # of edits, <KeyValue>, <KeyValue>, ... >
59 * For example:
60 * <-1, 3, <Keyvalue-for-edit-c1>, <KeyValue-for-edit-c2>, <KeyValue-for-edit-c3>>
61 *
62 * The -1 marker is just a special way of being backward compatible with
63 * an old HLog which would have contained a single <KeyValue>.
64 *
65 * The deserializer for WALEdit backward compatibly detects if the record
66 * is an old style KeyValue or the new style WALEdit.
67 *
68 */
69 public class WALEdit implements Writable {
70
71 private final int VERSION_2 = -1;
72
73 private final ArrayList<KeyValue> kvs = new ArrayList<KeyValue>();
74 private NavigableMap<byte[], Integer> scopes;
75
76 public WALEdit() {
77 }
78
79 public void add(KeyValue kv) {
80 this.kvs.add(kv);
81 }
82
83 public boolean isEmpty() {
84 return kvs.isEmpty();
85 }
86
87 public int size() {
88 return kvs.size();
89 }
90
91 public List<KeyValue> getKeyValues() {
92 return kvs;
93 }
94
95 public NavigableMap<byte[], Integer> getScopes() {
96 return scopes;
97 }
98
99
100 public void setScopes (NavigableMap<byte[], Integer> scopes) {
101 // We currently process the map outside of WALEdit,
102 // TODO revisit when replication is part of core
103 this.scopes = scopes;
104 }
105
106 public void readFields(DataInput in) throws IOException {
107 kvs.clear();
108 if (scopes != null) {
109 scopes.clear();
110 }
111 int versionOrLength = in.readInt();
112 if (versionOrLength == VERSION_2) {
113 // this is new style HLog entry containing multiple KeyValues.
114 int numEdits = in.readInt();
115 for (int idx = 0; idx < numEdits; idx++) {
116 KeyValue kv = new KeyValue();
117 kv.readFields(in);
118 this.add(kv);
119 }
120 int numFamilies = in.readInt();
121 if (numFamilies > 0) {
122 if (scopes == null) {
123 scopes = new TreeMap<byte[], Integer>(Bytes.BYTES_COMPARATOR);
124 }
125 for (int i = 0; i < numFamilies; i++) {
126 byte[] fam = Bytes.readByteArray(in);
127 int scope = in.readInt();
128 scopes.put(fam, scope);
129 }
130 }
131 } else {
132 // this is an old style HLog entry. The int that we just
133 // read is actually the length of a single KeyValue.
134 KeyValue kv = new KeyValue();
135 kv.readFields(versionOrLength, in);
136 this.add(kv);
137 }
138
139 }
140
141 public void write(DataOutput out) throws IOException {
142 out.writeInt(VERSION_2);
143 out.writeInt(kvs.size());
144 // We interleave the two lists for code simplicity
145 for (KeyValue kv : kvs) {
146 kv.write(out);
147 }
148 if (scopes == null) {
149 out.writeInt(0);
150 } else {
151 out.writeInt(scopes.size());
152 for (byte[] key : scopes.keySet()) {
153 Bytes.writeByteArray(out, key);
154 out.writeInt(scopes.get(key));
155 }
156 }
157
158 }
159
160 public String toString() {
161 StringBuilder sb = new StringBuilder();
162
163 sb.append("[#edits: " + kvs.size() + " = <");
164 for (KeyValue kv : kvs) {
165 sb.append(kv.toString());
166 sb.append("; ");
167 }
168 if (scopes != null) {
169 sb.append(" scopes: " + scopes.toString());
170 }
171 sb.append(">]");
172 return sb.toString();
173 }
174
175 }