From d30f8497e4ff84b1f4ac48a524e898b085cfff17 Mon Sep 17 00:00:00 2001 From: =?utf-8?q?C=C3=A9lestin=20Matte?= Date: Thu, 3 Feb 2022 14:22:35 +0100 Subject: [PATCH 1/2] Merge schema.sql into Django's model Move schema.sql content into models.py. Delete schema.sql. Adds specific postgres components that cannot be handled by Django's ORM using RunSQL() in a migration file. In the process, allow message.parentid to be null. --- .../migrations/0005_merge_schema.py | 162 ++++++++++++++++++ django/archives/mailarchives/models.py | 59 ++++++- loader/sql/schema.sql | 160 ----------------- 3 files changed, 218 insertions(+), 163 deletions(-) create mode 100644 django/archives/mailarchives/migrations/0005_merge_schema.py delete mode 100644 loader/sql/schema.sql diff --git a/django/archives/mailarchives/migrations/0005_merge_schema.py b/django/archives/mailarchives/migrations/0005_merge_schema.py new file mode 100644 index 0000000..b5ce31f --- /dev/null +++ b/django/archives/mailarchives/migrations/0005_merge_schema.py @@ -0,0 +1,162 @@ +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('mailarchives', '0004_resend_rate_limit'), + ] + + operations = [ + migrations.AlterField( + model_name='message', + name='parentid', + field=models.ForeignKey(db_column='parentid', on_delete=django.db.models.deletion.CASCADE, to='mailarchives.Message', null=True), + ), + migrations.AddField( + model_name='message', + name='rawtxt', + field=models.BinaryField(blank=True, null=False), + ), + migrations.AddField( + model_name='attachment', + name='attachment', + field=models.BinaryField(blank=True, null=False), + ), + migrations.CreateModel( + name='Loaderrors', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('dat', models.DateTimeField(auto_now_add=True)), + ('msgid', models.TextField()), + ('srctype', models.TextField()), + ('src', models.TextField()), + ('err', models.TextField()), + ('listid', models.ForeignKey(db_column='listid', on_delete=django.db.models.deletion.CASCADE, to='mailarchives.List')), + ], + options={ + 'db_table': 'loaderrors', + }, + ), + migrations.CreateModel( + name='UnresolvedMessages', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('priority', models.IntegerField()), + ('msgid', models.TextField()), + ('message', models.ForeignKey(db_column='message', on_delete=django.db.models.deletion.CASCADE, to='mailarchives.Message')), + ], + options={ + 'db_table': 'unresolved_messages', + 'unique_together': {('message', 'priority', 'msgid')}, + }, + ), + migrations.CreateModel( + name='ListThreads', + fields=[ + ('threadid', models.IntegerField(primary_key=True, serialize=False)), + ('listid', models.ForeignKey(db_column='listid', on_delete=django.db.models.deletion.CASCADE, to='mailarchives.List')), + ], + options={ + 'db_table': 'list_threads', + 'unique_together': {('threadid', 'listid')}, + }, + ), + migrations.CreateModel( + name='ListMonths', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('year', models.IntegerField()), + ('month', models.IntegerField()), + ('listid', models.ForeignKey(db_column='listid', on_delete=django.db.models.deletion.CASCADE, to='mailarchives.List')), + ], + options={ + 'db_table': 'list_months', + 'unique_together': {('listid', 'year', 'month')}, + }, + ), + migrations.CreateModel( + name='Legacymap', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('year', models.IntegerField()), + ('month', models.IntegerField()), + ('msgnum', models.IntegerField()), + ('msgid', models.TextField()), + ('listid', models.ForeignKey(db_column='listid', on_delete=django.db.models.deletion.CASCADE, to='mailarchives.List')), + ], + options={ + 'db_table': 'legacymap', + 'unique_together': {('listid', 'year', 'month', 'msgnum')}, + }, + ), + migrations.RunSQL("ALTER TABLE messages ADD COLUMN fti tsvector NOT NULL"), + migrations.RunSQL("CREATE INDEX idx_messages_threadid ON messages(threadid);"), + migrations.RunSQL("CREATE UNIQUE INDEX idx_messages_msgid ON messages(messageid);"), + migrations.RunSQL("CREATE INDEX idx_messages_date ON messages(date);"), + migrations.RunSQL("CREATE INDEX idx_messages_parentid ON messages(parentid);"), + migrations.RunSQL("CREATE SEQUENCE threadid_seq;"), + migrations.RunSQL("CREATE UNIQUE INDEX idx_unresolved_msgid_message ON unresolved_messages(msgid, message);"), + migrations.RunSQL("CREATE INDEX list_threads_listid_idx ON list_threads(listid);"), + migrations.RunSQL("CREATE INDEX idx_attachments_msg ON attachments(message);"), + migrations.RunSQL(""" +CREATE TEXT SEARCH CONFIGURATION pg (PARSER=tsparser); + +CREATE TEXT SEARCH DICTIONARY english_ispell ( + TEMPLATE = ispell, + DictFile = en_us, + AffFile = en_us, + StopWords = english +); +CREATE TEXT SEARCH DICTIONARY pg_dict ( + TEMPLATE = synonym, + SYNONYMS = pg_dict +); +CREATE TEXT SEARCH DICTIONARY pg_stop ( + TEMPLATE = simple, + StopWords = pg_dict +); +ALTER TEXT SEARCH CONFIGURATION pg + ALTER MAPPING FOR asciiword, asciihword, hword_asciipart, + word, hword, hword_part + WITH pg_stop, pg_dict, english_ispell, english_stem; +ALTER TEXT SEARCH CONFIGURATION pg + DROP MAPPING FOR email, url, url_path, sfloat, float; + +CREATE FUNCTION messages_fti_trigger_func() RETURNS trigger AS $$ +BEGIN + NEW.fti = setweight(to_tsvector('public.pg', coalesce(new.subject, '')), 'A') || + setweight(to_tsvector('public.pg', coalesce(new.bodytxt, '')), 'D'); + RETURN NEW; +END + +$$ LANGUAGE 'plpgsql'; + +CREATE TRIGGER messages_fti_trigger + BEFORE INSERT OR UPDATE OF subject, bodytxt ON messages + FOR EACH ROW EXECUTE PROCEDURE messages_fti_trigger_func(); +CREATE INDEX messages_fti_idx ON messages USING gin(fti); + +/* Simple API for hiding messages */ +CREATE OR REPLACE FUNCTION hide_message(msgid_txt text, reason_code integer, user_txt text, reason_txt text) + RETURNS integer AS +$BODY$ +DECLARE + returned_id integer; +BEGIN + UPDATE messages SET hiddenstatus = reason_code WHERE messageid = msgid_txt RETURNING id INTO returned_id; + + IF NOT FOUND THEN + RAISE EXCEPTION 'The specified message (%) could not be found.', msgid_txt; + END IF; + + INSERT INTO message_hide_reasons (message, dt, reason, by) VALUES (returned_id, now(), reason_txt, user_txt); + + RETURN returned_id; +END; +$BODY$ + LANGUAGE plpgsql VOLATILE + COST 100; +""") + ] diff --git a/django/archives/mailarchives/models.py b/django/archives/mailarchives/models.py index 44c4469..6c7ed41 100644 --- a/django/archives/mailarchives/models.py +++ b/django/archives/mailarchives/models.py @@ -24,8 +24,8 @@ class Message(models.Model): date = models.DateTimeField(null=False) messageid = models.TextField(null=False) bodytxt = models.TextField(null=False) - # rawtxt is a bytea field, which django doesn't support (easily) - parentid = models.IntegerField(null=False, blank=False) + rawtxt = models.BinaryField(null=True) + parentid = models.ForeignKey("self", on_delete=models.CASCADE, db_column='parentid', null=True) has_attachment = models.BooleanField(null=False, default=False) hiddenstatus = models.IntegerField(null=True) # fti is a tsvector field, which django doesn't support (easily) @@ -101,7 +101,7 @@ class Attachment(models.Model): message = models.ForeignKey(Message, null=False, blank=False, db_column='message', on_delete=models.CASCADE) filename = models.CharField(max_length=1000, null=False, blank=False) contenttype = models.CharField(max_length=1000, null=False, blank=False) - # attachment = bytea, not supported by django at this point + attachment = models.BinaryField(null=True) class Meta: db_table = 'attachments' @@ -141,3 +141,56 @@ class ResendMessage(models.Model): class LastResentMessage(models.Model): sentto = models.OneToOneField(User, null=False, blank=False, primary_key=True, on_delete=models.CASCADE) sentat = models.DateTimeField(null=False, blank=False) + + +class UnresolvedMessages(models.Model): + message = models.ForeignKey(Message, on_delete=models.CASCADE) + priority = models.IntegerField(null=False, blank=False) + msgid = models.TextField(null=False) + + class Meta: + db_table = 'unresolved_messages' + unique_together = (('message', 'priority', 'msgid'),) + + +class ListMonths(models.Model): + listid = models.ForeignKey('List', on_delete=models.CASCADE, db_column='listid') + year = models.IntegerField(null=False) + month = models.IntegerField(null=False) + + class Meta: + db_table = 'list_months' + unique_together = (('listid', 'year', 'month'),) + + +class ListThreads(models.Model): + threadid = models.IntegerField(primary_key=True) + listid = models.ForeignKey('List', on_delete=models.CASCADE, db_column='listid') + + class Meta: + db_table = 'list_threads' + unique_together = (('threadid', 'listid'),) + + +class Loaderrors(models.Model): + listid = models.ForeignKey(List, null=False, blank=False, on_delete=models.CASCADE, db_column='listid') + dat = models.DateTimeField(null=False, auto_now_add=True) + msgid = models.TextField(null=False) + srctype = models.TextField(null=False) + src = models.TextField(null=False) + err = models.TextField(null=False) + + class Meta: + db_table = 'loaderrors' + + +class Legacymap(models.Model): + listid = models.ForeignKey(List, null=False, blank=False, on_delete=models.CASCADE, db_column='listid') + year = models.IntegerField(null=False) + month = models.IntegerField(null=False) + msgnum = models.IntegerField(null=False) + msgid = models.TextField(null=False) + + class Meta: + db_table = 'legacymap' + unique_together = (('listid', 'year', 'month', 'msgnum'),) diff --git a/loader/sql/schema.sql b/loader/sql/schema.sql deleted file mode 100644 index be735d9..0000000 --- a/loader/sql/schema.sql +++ /dev/null @@ -1,160 +0,0 @@ -\set ON_ERROR_STOP on - -BEGIN; - -CREATE TABLE messages ( - id SERIAL NOT NULL PRIMARY KEY, - parentid int REFERENCES messages, - threadid int NOT NULL, - _from text NOT NULL, - _to text NOT NULL, - cc text NOT NULL, - subject text NOT NULL, - date timestamptz NOT NULL, - loaddate timestamptz NOT NULL DEFAULT CURRENT_TIMESTAMP, - has_attachment boolean NOT NULL, - hiddenstatus int NULL, - messageid text NOT NULL, - bodytxt text NOT NULL, - rawtxt bytea NOT NULL, - fti tsvector NOT NULL -); -CREATE INDEX idx_messages_threadid ON messages(threadid); -CREATE UNIQUE INDEX idx_messages_msgid ON messages(messageid); -CREATE INDEX idx_messages_date ON messages(date); -CREATE INDEX idx_messages_parentid ON messages(parentid); - -CREATE TABLE message_hide_reasons ( - message int NOT NULL PRIMARY KEY REFERENCES messages, - dt timestamptz, - reason text, - by text -); - -CREATE SEQUENCE threadid_seq; - -CREATE TABLE unresolved_messages( - message int NOT NULL REFERENCES messages, - priority int NOT NULL, - msgid text NOT NULL, - CONSTRAINT unresolved_messages_pkey PRIMARY KEY (message, priority) -); - -CREATE UNIQUE INDEX idx_unresolved_msgid_message ON unresolved_messages(msgid, message); - -CREATE TABLE listgroups( - groupid int NOT NULL PRIMARY KEY, - groupname text NOT NULL UNIQUE, - sortkey int NOT NULL -); - -CREATE TABLE lists( - listid int NOT NULL PRIMARY KEY, - listname text NOT NULL UNIQUE, - shortdesc text NOT NULL, - description text NOT NULL, - active boolean NOT NULL, - groupid int NOT NULL REFERENCES listgroups(groupid) -); - -CREATE TABLE list_months( - listid int NOT NULL REFERENCES lists(listid), - year int NOT NULL, - month int NOT NULL, - CONSTRAINT list_months_pk PRIMARY KEY (listid, year, month) -); - -CREATE TABLE list_threads( - threadid int NOT NULL, /* comes from threadid_seq */ - listid int NOT NULL REFERENCES lists(listid), - CONSTRAINT pg_list_threads PRIMARY KEY (threadid, listid) -); -CREATE INDEX list_threads_listid_idx ON list_threads(listid); - -CREATE TABLE attachments( - id serial not null primary key, - message int not null references messages(id), - filename text not null, - contenttype text not null, - attachment bytea not null -); -CREATE INDEX idx_attachments_msg ON attachments(message); - -CREATE TABLE loaderrors( - id SERIAL NOT NULL PRIMARY KEY, - listid int NOT NULL, - dat timestamptz NOT NULL DEFAULT CURRENT_TIMESTAMP, - msgid text NOT NULL, - srctype text NOT NULL, - src text NOT NULL, - err text NOT NULL -); - -/* textsearch configs */ -CREATE TEXT SEARCH CONFIGURATION pg (PARSER=tsparser); - -CREATE TEXT SEARCH DICTIONARY english_ispell ( - TEMPLATE = ispell, - DictFile = en_us, - AffFile = en_us, - StopWords = english -); -CREATE TEXT SEARCH DICTIONARY pg_dict ( - TEMPLATE = synonym, - SYNONYMS = pg_dict -); -CREATE TEXT SEARCH DICTIONARY pg_stop ( - TEMPLATE = simple, - StopWords = pg_dict -); -ALTER TEXT SEARCH CONFIGURATION pg - ALTER MAPPING FOR asciiword, asciihword, hword_asciipart, - word, hword, hword_part - WITH pg_stop, pg_dict, english_ispell, english_stem; -ALTER TEXT SEARCH CONFIGURATION pg - DROP MAPPING FOR email, url, url_path, sfloat, float; - -CREATE FUNCTION messages_fti_trigger_func() RETURNS trigger AS $$ -BEGIN - NEW.fti = setweight(to_tsvector('public.pg', coalesce(new.subject, '')), 'A') || - setweight(to_tsvector('public.pg', coalesce(new.bodytxt, '')), 'D'); - RETURN NEW; -END -$$ LANGUAGE 'plpgsql'; - -CREATE TRIGGER messages_fti_trigger - BEFORE INSERT OR UPDATE OF subject, bodytxt ON messages - FOR EACH ROW EXECUTE PROCEDURE messages_fti_trigger_func(); -CREATE INDEX messages_fti_idx ON messages USING gin(fti); - -CREATE TABLE legacymap( - listid int not null, - year int not null, - month int not null, - msgnum int not null, - msgid text not null, -CONSTRAINT legacymap_pk PRIMARY KEY (listid, year, month, msgnum) -); - -/* Simple API for hiding messages */ -CREATE OR REPLACE FUNCTION hide_message(msgid_txt text, reason_code integer, user_txt text, reason_txt text) - RETURNS integer AS -$BODY$ -DECLARE - returned_id integer; -BEGIN - UPDATE messages SET hiddenstatus = reason_code WHERE messageid = msgid_txt RETURNING id INTO returned_id; - - IF NOT FOUND THEN - RAISE EXCEPTION 'The specified message (%) could not be found.', msgid_txt; - END IF; - - INSERT INTO message_hide_reasons (message, dt, reason, by) VALUES (returned_id, now(), reason_txt, user_txt); - - RETURN returned_id; -END; -$BODY$ - LANGUAGE plpgsql VOLATILE - COST 100; - -\echo Dont forget to commit! -- 2.34.1