Domain Understanding
RT is a read-heavy aggregation platform. Millions of people read movie scores every day. Very few people write reviews compared to readers. This means the schema must be optimized for reads, accepting some write complexity to achieve that.
Entity Map
Full Schema with Comments
1CREATE TABLE movies (
2 id BIGINT AUTO_INCREMENT PRIMARY KEY,
3 -- BIGINT over INT: future-proofing for millions of movies
4 title VARCHAR(255) NOT NULL,
5 released_on DATE,
6 -- DATE not DATETIME: we only care about the day, not time of release
7 duration_minutes INT,
8 synopsis TEXT,
9 -- TEXT not VARCHAR: synopses can be long; TEXT has no 65535 byte row limit issue
10 poster_url VARCHAR(500),
11
12 -- Audit fields: mandatory on every production table
13 created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
14 updated_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP
15 ON UPDATE CURRENT_TIMESTAMP,
16 -- DATETIME not TIMESTAMP: TIMESTAMP overflows on Jan 19, 2038 (32-bit integer)
17
18 deleted_at DATETIME NULL
19 -- NULL = active row. NOT NULL = soft-deleted. Never hard DELETE.
20);
21
22-- Genre lookup table. Rows rarely change after initial seed.
23CREATE TABLE genres (
24 id BIGINT AUTO_INCREMENT PRIMARY KEY,
25 name VARCHAR(100) NOT NULL UNIQUE
26 -- UNIQUE: prevents two 'Drama' rows
27);
28
29-- M:M junction — one movie has many genres, one genre belongs to many movies
30CREATE TABLE movie_genres (
31 movie_id BIGINT NOT NULL,
32 genre_id BIGINT NOT NULL,
33
34 PRIMARY KEY (movie_id, genre_id),
35 -- Composite PK does two things:
36 -- 1. Enforces uniqueness: same genre can't be tagged twice on one movie
37 -- 2. Creates a clustered index on (movie_id, genre_id) for fast JOINs
38
39 FOREIGN KEY (movie_id) REFERENCES movies(id),
40 FOREIGN KEY (genre_id) REFERENCES genres(id)
41);
42
43-- One table for all people: directors, actors, writers
44-- Role belongs on the RELATIONSHIP (movie_persons), not on the entity
45CREATE TABLE persons (
46 id BIGINT AUTO_INCREMENT PRIMARY KEY,
47 full_name VARCHAR(255) NOT NULL,
48 birth_date DATE,
49 bio TEXT,
50 photo_url VARCHAR(500)
51);
52
53CREATE TABLE movie_persons (
54 id BIGINT AUTO_INCREMENT PRIMARY KEY,
55 -- Surrogate PK instead of composite (movie_id, person_id) because:
56 -- the same person can have MULTIPLE roles in the same movie
57 -- e.g. Nolan is both DIRECTOR and WRITER on Interstellar
58 -- A composite PK of (movie_id, person_id) would reject the second row
59
60 movie_id BIGINT NOT NULL,
61 person_id BIGINT NOT NULL,
62 role ENUM('DIRECTOR','ACTOR','WRITER','PRODUCER') NOT NULL,
63 -- ENUM restricts to a controlled list — no free-text typos
64
65 FOREIGN KEY (movie_id) REFERENCES movies(id),
66 FOREIGN KEY (person_id) REFERENCES persons(id)
67);
68
69CREATE TABLE users (
70 id BIGINT AUTO_INCREMENT PRIMARY KEY,
71 email VARCHAR(255) NOT NULL UNIQUE,
72 -- UNIQUE enforced at DB level — not just application code
73 display_name VARCHAR(100) NOT NULL,
74 created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
75 deleted_at DATETIME NULL
76 -- Soft delete: deleted users' reviews must remain for score integrity
77);
78
79CREATE TABLE critics (
80 id BIGINT AUTO_INCREMENT PRIMARY KEY,
81 full_name VARCHAR(255) NOT NULL,
82 publication VARCHAR(255),
83 -- NULLABLE: a freelance critic may not belong to any publication
84 is_approved BOOLEAN NOT NULL DEFAULT FALSE
85 -- RT manually approves critics before reviews count toward Tomatometer
86 -- Default FALSE: new critics unapproved until RT reviews their credentials
87);
88
89CREATE TABLE critic_reviews (
90 id BIGINT AUTO_INCREMENT PRIMARY KEY,
91 movie_id BIGINT NOT NULL,
92 critic_id BIGINT NOT NULL,
93 is_fresh BOOLEAN NOT NULL,
94 -- TRUE = Fresh, FALSE = Rotten
95 -- Tomatometer = (fresh_count / total_count) * 100
96 review_text TEXT,
97 -- NULLABLE: RT sometimes only captures Fresh/Rotten verdict, not full text
98 source_url VARCHAR(500),
99 -- Link to original published review on critic's publication
100 reviewed_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
101
102 UNIQUE (movie_id, critic_id),
103 -- DB-level enforcement: one critic can review a movie exactly once
104 -- Application code alone is not enough — race conditions bypass it
105
106 FOREIGN KEY (movie_id) REFERENCES movies(id),
107 FOREIGN KEY (critic_id) REFERENCES critics(id)
108);
109
110CREATE TABLE audience_reviews (
111 id BIGINT AUTO_INCREMENT PRIMARY KEY,
112 movie_id BIGINT NOT NULL,
113 user_id BIGINT NOT NULL,
114 rating TINYINT NOT NULL CHECK (rating BETWEEN 1 AND 5),
115 -- TINYINT: stores 0-255, perfect for a 1-5 scale, minimal storage
116 -- CHECK constraint: DB rejects any rating outside 1-5 at insert time
117 review_text TEXT,
118 reviewed_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
119
120 UNIQUE (movie_id, user_id),
121 -- Same principle: one review per user per movie, DB-enforced
122
123 FOREIGN KEY (movie_id) REFERENCES movies(id),
124 FOREIGN KEY (user_id) REFERENCES users(id)
125);
126
127CREATE TABLE watchlists (
128 user_id BIGINT NOT NULL,
129 movie_id BIGINT NOT NULL,
130 added_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
131
132 PRIMARY KEY (user_id, movie_id),
133 -- Leading column is user_id: queries like "all movies for user X"
134 -- are served by this clustered index — no separate index needed
135
136 FOREIGN KEY (user_id) REFERENCES users(id),
137 FOREIGN KEY (movie_id) REFERENCES movies(id)
138);
139
140-- Precomputed by a background job. Never computed live on page load.
141CREATE TABLE movie_scores (
142 movie_id BIGINT PRIMARY KEY,
143 -- One row per movie. PK = FK. No separate id needed.
144
145 tomatometer_score TINYINT,
146 -- Percentage 0-100. NULL = not enough reviews yet.
147 audience_score TINYINT,
148 -- Audience Score = % of audience_reviews where rating >= 4
149
150 total_critic_reviews INT NOT NULL DEFAULT 0,
151 total_audience_reviews INT NOT NULL DEFAULT 0,
152 -- Stored counts avoid re-counting on every read
153
154 last_computed_at DATETIME NOT NULL,
155 -- Tells the UI how stale this score is
156
157 FOREIGN KEY (movie_id) REFERENCES movies(id)
158);
159
160-- Indexes: every FK column queried frequently needs one
161CREATE INDEX idx_critic_reviews_movie ON critic_reviews(movie_id);
162CREATE INDEX idx_audience_reviews_movie ON audience_reviews(movie_id);
163CREATE INDEX idx_movie_persons_movie ON movie_persons(movie_id);
164-- watchlists: no separate index needed — composite PK (user_id, movie_id) covers it
Key Design Decisions
Indexing Strategy
Veteran rule: Every index is a write tax. On every INSERT or UPDATE, MySQL must update every index on that table. Never add indexes "just in case." Add them only when you can name the exact query they serve.
Common Traps to Avoid
- genre VARCHAR on movies — A movie has multiple genres. Single column forces comma-separated strings which break 1NF and can't use indexes. Use a junction table.
- Merging critics and users — Single-table-inheritance anti-pattern. Different data shapes belong in different tables.
- Computing Tomatometer live — Aggregating millions of rows per request at scale kills the DB. Precompute into movie_scores.
- Enforcing uniqueness only in app code — Race conditions bypass app-level checks. Use DB UNIQUE constraint — the only atomic guarantee.
- Using TIMESTAMP for audit fields — Overflows January 19, 2038. Use DATETIME.
- Hard-deleting reviews — Corrupts Tomatometer scores and may violate legal retention requirements. Always soft-delete.
Interview Q&A
Q: Why did you separate critic reviews and audience reviews?
They have fundamentally different data shapes and business rules. Critic reviews are binary (Fresh/Rotten) and feed the Tomatometer. Audience reviews are numeric (1–5) and feed the Audience Score. Merging them creates nullable columns on every row — the single-table-inheritance anti-pattern.
Q: How would the Tomatometer score be calculated?
Not live on page load. A background job runs periodically, queries critic_reviews for movies with new reviews, computes the fresh percentage, and writes it into movie_scores. The movie detail page reads one row — O(1). This is materialized aggregation, the standard pattern for read-heavy computed metrics.
Q: How do you prevent a user from reviewing the same movie twice?
UNIQUE(movie_id, user_id) constraint at the DB level. Application checks can be bypassed by race conditions. The DB constraint is atomic and absolute.
Q: What happens if a critic's account is deleted?
Soft delete the critic row with deleted_at. Do not cascade-delete their reviews — those reviews contributed to Tomatometer scores. Deleting them corrupts historical data. Reviews stay. Critic is flagged inactive.
Q: How does this schema scale at 100x traffic?
The precomputed movie_scores table already handles read scale — no aggregation at runtime. Further: read replicas for SELECT load, Redis cache in front of movie_scores with short TTL, and hash partitioning on critic_reviews and audience_reviews once those tables exceed 100M rows.
Q: Why does movie_persons use a surrogate key instead of a composite PK?
Same person can have multiple roles in one movie. A composite PK of (movie_id, person_id) would reject a second row for a person who is both director and writer. The surrogate key allows multiple rows for the same person-movie pair, differentiated by the role column.
ANKI Cards
Q: What is materialized aggregation and when do you use it?
A: Precomputing expensive aggregations (like COUNT, percentage) via a background job and storing the result in a dedicated table. Use when the aggregation is read-heavy but the underlying data changes infrequently enough that slight staleness is acceptable.
Q: Why is a UNIQUE constraint safer than an application-level duplicate check?
A: Two simultaneous requests can both pass an app-level check before either commits (race condition). A DB UNIQUE constraint is atomic — the second insert is rejected regardless of what the application does.
Q: Why use DATETIME instead of TIMESTAMP in MySQL?
A: TIMESTAMP is a 32-bit Unix integer that overflows on January 19, 2038. DATETIME is 64-bit with no such limitation. Always use DATETIME for audit fields in production systems.