Design a data structure to implement a simplified version of an Excel sheet. Implement the Excel class:
Excel(int height, char width)
Initializes the object with the height and width of the sheet. The sheet is an H x W grid consisting of integer cells. The height is a positive integer, and the width is a character representing the column id. For example, if width = 'C', then the corresponding width is 3.void set(int row, char column, int val)
Sets the value at the given row and column to val. Row and column are 1-indexed.int get(int row, char column)
Returns the value at the given row and column. Row and column are 1-indexed.int sum(int row, char column, List<String> formula)
Sets the value at the given row and column to be the sum of the cells described by the formula. Row and column are 1-indexed. The formula is a list of strings that represent cells or ranges. Each string can be one of the following:
Example:
Excel excel = new Excel(3, "C");
// A B C
// 1 0 0 0
// 2 0 0 0
// 3 0 0 0
excel.set(1, "A", 2);
// A B C
// 1 2 0 0
// 2 0 0 0
// 3 0 0 0
excel.sum(3, "C", ["A1", "A1:B2"]); // Should return 4.
//The cell C3 should be equal to the sum of cell A1 and all the cells in the A1:B2 range. 2+0+0+0 = 2.
// A B C
// 1 2 0 0
// 2 0 0 0
// 3 2 0 2
excel.get(3, "C"); // returns 2
excel.set(2, "B", 2);
// A B C
// 1 2 0 0
// 2 0 2 0
// 3 2 0 2
excel.sum(3, "C", ["A1", "A1:B2"]); // Should return 6.
excel.get(3, "C"); // returns 6
Clarifying questions to consider asking:
A naive solution would involve storing the value of each cell in a 2D array. When a SUM
formula is encountered, iterate through the specified range of cells and calculate the sum. The primary data structure will be a 2D array (or a dictionary/hashmap). Each cell in excel is represented by its row and column. We must store the value for each cell and keep track of the formulas.
class Excel:
def __init__(self, height: int, width: str):
self.height = height
self.width = ord(width) - ord('A') + 1
self.grid = [[0] * self.width for _ in range(self.height)]
self.formulas = [[None] * self.width for _ in range(self.height)]
def set(self, row: int, column: str, val: int) -> None:
row -= 1
column = ord(column) - ord('A')
self.grid[row][column] = val
self.formulas[row][column] = None # Clear any existing formula
def get(self, row: int, column: str) -> int:
row -= 1
column = ord(column) - ord('A')
if self.formulas[row][column]:
self.grid[row][column] = self.evaluate(row, column)
return self.grid[row][column]
def sum(self, row: int, column: str, formula: List[str]) -> None:
row -= 1
column = ord(column) - ord('A')
self.formulas[row][column] = formula
self.grid[row][column] = self.evaluate(row, column)
def evaluate(self, row, col):
if not self.formulas[row][col]:
return self.grid[row][col]
formula = self.formulas[row][col]
total = 0
for cell_def in formula:
if ':' in cell_def:
start, end = cell_def.split(':')
start_row = int(start[1:]) - 1
start_col = ord(start[0]) - ord('A')
end_row = int(end[1:]) - 1
end_col = ord(end[0]) - ord('A')
for r in range(start_row, end_row + 1):
for c in range(start_col, end_col + 1):
total += self.get(r + 1, chr(c + ord('A')))
else:
cell_row = int(cell_def[1:])
cell_col = ord(cell_def[0]) - ord('A')
total += self.get(cell_row, cell_def[0])
return total
# Your Excel object will be instantiated and called as such:
# obj = Excel(height, width)
# obj.set(row,column,val)
# param_2 = obj.get(row,column)
# obj.sum(row,column,formula)
set()
: O(1)get()
: O(V), where V is the number of cells in the formula's rangesum()
: O(V)The optimal solution uses a dependency graph to keep track of the dependencies between cells. This avoids recomputing sums unnecessarily and allows for efficient updates when a cell's value changes. The crucial insight is that when a cell changes, all the cells that depend on it also need to be updated.
grid
: A 2D array (or a dictionary) to store cell values. This is similar to the naive approach.formula
: A 2D array (or a dictionary) to store the formula of each cell, if any. Store the cell name dependencies for each cell in the spreadsheet.dependencies
: A dictionary that stores the cells that depend on a given cell. For example, dependencies['A1']
would be a set of cells that have a formula that includes cell A1
.set(row, column, val)
:
val
.set
function.get(row, column)
:
sum(row, column, formula)
:
class Excel:
def __init__(self, height: int, width: str):
self.height = height
self.width = ord(width) - ord('A') + 1
self.grid = [[0] * self.width for _ in range(self.height)]
self.formulas = [[None] * self.width for _ in range(self.height)]
self.dependencies = {}
def set(self, row: int, column: str, val: int) -> None:
row -= 1
column = ord(column) - ord('A')
self.grid[row][column] = val
self.clear_dependencies(row, column)
self.formulas[row][column] = None
self.update_dependents(row, column)
def get(self, row: int, column: str) -> int:
row -= 1
column = ord(column) - ord('A')
return self.grid[row][column]
def sum(self, row: int, column: str, formula: List[str]) -> None:
row -= 1
column = ord(column) - ord('A')
self.formulas[row][column] = formula
self.update_dependencies(row, column, formula)
self.grid[row][column] = self.evaluate(row, column)
self.update_dependents(row, column)
def evaluate(self, row, col):
if not self.formulas[row][col]:
return self.grid[row][col]
formula = self.formulas[row][col]
total = 0
for cell_def in formula:
if ':' in cell_def:
start, end = cell_def.split(':')
start_row = int(start[1:]) - 1
start_col = ord(start[0]) - ord('A')
end_row = int(end[1:]) - 1
end_col = ord(end[0]) - ord('A')
for r in range(start_row, end_row + 1):
for c in range(start_col, end_col + 1):
total += self.get(r + 1, chr(c + ord('A')))
else:
cell_row = int(cell_def[1:]) - 1
cell_col = ord(cell_def[0]) - ord('A')
total += self.get(cell_row + 1, cell_def[0])
return total
def update_dependencies(self, row, col, formula):
cell_name = chr(col + ord('A')) + str(row + 1)
for cell_def in formula:
if ':' in cell_def:
start, end = cell_def.split(':')
start_row = int(start[1:]) - 1
start_col = ord(start[0]) - ord('A')
end_row = int(end[1:]) - 1
end_col = ord(end[0]) - ord('A')
for r in range(start_row, end_row + 1):
for c in range(start_col, end_col + 1):
dep_cell = chr(c + ord('A')) + str(r + 1)
if dep_cell not in self.dependencies:
self.dependencies[dep_cell] = set()
self.dependencies[dep_cell].add(cell_name)
else:
if cell_def not in self.dependencies:
self.dependencies[cell_def] = set()
self.dependencies[cell_def].add(cell_name)
def clear_dependencies(self, row, col):
cell_name = chr(col + ord('A')) + str(row + 1)
for dep_cell, dependent_cells in self.dependencies.items():
if cell_name in dependent_cells:
dependent_cells.remove(cell_name)
def update_dependents(self, row, col):
cell_name = chr(col + ord('A')) + str(row + 1)
if cell_name in self.dependencies:
for dependent_cell in self.dependencies[cell_name]:
dep_col = ord(dependent_cell[0]) - ord('A')
dep_row = int(dependent_cell[1:]) - 1
self.grid[dep_row][dep_col] = self.evaluate(dep_row, dep_col)
self.update_dependents(dep_row, dep_col)
set()
: O(U), where U is the number of cells that depend on the updated cell (in the worst case, it could be O(H * W)).get()
: O(1)sum()
: O(V + U), where V is the number of cells in the formula's range and U is the number of dependent cells.