Personal tools
You are here: Home Projects SETL SETL2 Source code DB_records.stl
Document Actions

DB_records.stl

by Paul McJones last modified 2021-02-25 11:38

"Fourth disk record variant, allowing cumulants to be kept in parents."

-- file disk_records_refct.stl
-- ***********************************************************************************************
-- ***************** fourth disk record variant, allowing cumulants to be kept in parents  *********
-- ***********************************************************************************************

	-- This variant of the disk record packages provides the collection of differently structured,
	-- automatically paging disk record objects needed for the database application. Six different
	-- variants are provided, distinguished by the value of their common rectype byte, which defines
	-- the record type and determines whether the record is 'compound' or 'non-compound'. The 'objects'
	-- of the preceding version of this package are systematically replaced by 4-byte record identifiers
	-- exactly corresponding to 32-bit record numbers.

package disk_records_pak;		-- common package for various kinds of disk records

const rec_size := 128;					-- byte size of records; initally set small to test swapping
										-- NOTE: the first record is reserved for master info about the file
var code_pts;                                                   -- code points to be traversed  ******* TESTING ONLY *******

procedure clear_memory();				-- reset the recno_generator, thus clearing almost all records
procedure check_memory();				-- read the amount of memory currently in use
										-- NOTE: ******* TESTING ONLY ******* 
procedure set_is_compound(rec,isc);		-- set the is_compound flag for rec
procedure set_type(rec,typ);			-- set the type of a record

procedure report_points_passed();               -- report on code points not traversed  ******* TESTING ONLY *******

procedure pass(pt);                                     -- traverse a code point******* TESTING ONLY *******


end disk_records_pak;

package db_records;		-- this package provides basic operations for the six classes of record 'objects'
						-- used in the database application. These are:
							-- (0) string pieces for big strings
							-- (1) string pieces for record occurence strings
							-- (2) bigstring nodes (simple and compound)
							-- (3) record_occurences bigstring nodes (simple and compound)
							-- (4) word index B_tree nodes (simple and compound)
							-- (5) database cumulative record-start index B_tree nodes (simple and compound)

const string_record := "\x00";					-- string record record type
	-- the layout of this record type is: 2 lead byte number of chars; remaining bytes are chars
	-- (125 of these) total 128 bytes
const sr_ncr_1 := 2,sr_ncr_2 := 3,sr_char_start := 4;		-- start and end of number of characters field
const sr_low_lim := 63, sr_hi_lim := 2 * sr_low_lim - 1;                -- min and max number of allowed characters


const wdoccs_string_record := "\x01";			-- string pieces for record occurence strings
	-- the layout of this record type is: 1 lead byte number of occs; remaining bytes 4-byte record identifiers
	-- (30 of these) total 122 bytes
const wo_nr_1 := 3,wo_nr_2 := 3,wo_occs_start := 4;		-- start and end of number of occurences field
const wos_low_lim := 4, wos_hi_lim := 2 * wos_low_lim;          -- min and max number of allowed occurences


const bigstr_node_record := "\x82";				-- bigstring nodes. These have character count as their sole cumulator
const bigstr_node_ncr := "\x02";							-- non-compound bigstring nodes.

const bnr_low_lim := 7,bnr_hi_lim := 2 * bnr_low_lim - 1;		-- min and max number of allowed children
const bnr_ch_start := 3,bnr_cum_start := 59,bnr_cum_end := 128;
	-- layout is: 14 children (4 bytes each);14 cumulants children (5 bytes each); total 128 bytes.

const wdoccs_str_node_record := "\x83";		-- record_occurences bigstring nodes.
													-- These have item count and rightmost record key as cumulators
const wdoccs_str_node_ncr := "\x03";					-- non-compound record_occurences bigstring nodes.

const wo_low_lim := 4,wo_hi_lim := 2 * wo_low_lim;			-- min and max number of allowed children
const wo_ch_start := 3,wo_cum_start := 39,wo_cum2_start := 84,wo_cum2_end := 119;
	-- layout is: 9 children (4 bytes each), 9 cumulated number of occurences (5 bytes each),
	-- 9 rightmost occuring id in child (4 bytes each); total 119 bytes.

const wd_index_node_record := "\x84";		-- word index B_tree nodes.
													-- These have item count and rightmost word as cumulators
const wd_index_node_ncr := "\x04";						-- non-compound word index B_tree nodes.

const wix_low_lim := 3,wix_hi_lim := 2 * wix_low_lim - 1;			-- min and max number of allowed children
	-- layout is: cum_occs (5 bytes each, 6 of these, 30 bytes), word_length and start word_start (12 bytes each,
	-- 6 of these, 72 bytes); children (4 bytes each, 6 of these, 24 bytes); total 128 bytes.
const wix_ch_start := 3,wix_cum_start := 27,wix_cum2_start := 57,wix_cum2_end := 128;

const wixnc_low_lim := 3,wixnc_hi_lim := 2 * wixnc_low_lim;			-- min and max number of allowed children
	-- layout is: cum_occs (5 bytes each, 7 of these, 35 bytes), word_length and start word_start (12 bytes each,
	-- 7 of these, 84 bytes) total 121 bytes.
const wixnc_ch_start := 3,wixnc_cum_start := 3,wixnc_cum2_start := 38,wixnc_cum2_end := 121;

const db_index_node_record := "\x85";			-- database cumulative record-start index B_tree nodes.
													-- These have item count and rightmost word as cumulators
const db_index_node_ncr := "\x05";						-- non-compound database record-start index B_tree nodes.
const dbix_low_lim := 4,dbix_hi_lim := 2 * dbix_low_lim;		-- min and max number of allowed children
	-- layout is: cum_length (5 bytes each, 9 of these, 45 bytes), last key  (4 bytes each, 9 of these, 36 bytes)
	-- children (4 bytes each, 9 of these, 36 bytes); total 119 bytes.
const dbix_ch_start := 3,dbix_cum_start := 39,dbix_cum2_start := 84,dbix_cum2_end := 119;

const dbnc_low_lim := 7,dbnc_hi_lim := 2 * dbnc_low_lim - 1;		-- min and max number of allowed children
	-- layout is: cum_length (5 bytes each, 14 of these, 70 bytes), last key  (4 bytes each, 14 of these, 56 bytes)
	-- total 128 bytes.
const dbnc_ch_start := 3,dbnc_cum_start := 3,dbnc_cum2_start := 73,dbnc_cum2_end := 128;

const type_byte := 1;							-- record byte containing its type
const nch_byte := 2;							-- record byte containing its number of children

			-- ******** routines applicable to all nodes ********

	procedure num_childr(rec);				-- number of children of node
	procedure set_num_childr(rec,n);		-- set number of children of node

		-- ******** special routines for string_record nodes ********

	procedure sr_length(rec);			-- length of active part of a record
	procedure sr_slice(rec,i,j);			-- get slice of a record
	procedure sr_set_slice(rw rec,i,j,stg);	-- set slice of record

		-- ******** special routines for wdoccs_string_record nodes ********

	procedure wo_length(rec);			-- length of active part of a record
	procedure wo_slice(rec,i,j);			-- get slice of a wd occurence record
	procedure wo_set_slice(rw rec,i,j,stg);	-- set slice of wd occurence record

		-- ******** special routines for bigstr_node_record nodes ********
				-- none, but see 'increfs'
				
		-- ******** special routines for wdoccs_str_node_record nodes ********
				-- none, but see 'increfs'

		-- ******** special routines for wd_index_node_record nodes ********
				-- none, but see 'increfs'

		-- ******** special routines for db_index_node_record nodes ********
				-- none, but see 'increfs'

end db_records;

package body disk_records_pak;		-- common package for various kinds of disk records
use byteutil,string_utility_pak,db_records,setldb;		-- common package for reference count manipulation

var code_pts_passed := {};

procedure pass(pt); code_pts_passed with:= pt; end pass;
procedure report_points_passed();       -- report on code points not traversed

        if nn := #(not_passed := code_pts - code_pts_passed) = 0 then
                print("all ",#code_pts," points passed");
        else
                print(nn," points not passed: ",not_passed);
        end if;

end report_points_passed;


procedure clear_memory();				-- reset the recno_generator, thus clearing almost all records
	return 0;
										-- NOTE: ******* TESTING ONLY ******* 
	old_recno_generator := recno_generator;
	recno_generator := 10;		-- keep the first few records, which are special
	free_list := [];			-- clear the free list
	in_use := {rn: rn in in_use | (rn(1..3) = "\x00\x00\x00" and abs(rn(4)) <= 10)};
	return old_recno_generator;
	
end clear_memory;

procedure check_memory();				-- read the amount of memory currently in use
	return dr_check_memory();

	if (used := recno_generator - #free_list) < 50 then 
		return [used,{hexify(x): x in in_use},[hexify(x): x in free_list]]; 
	end if;

	return used;	-- the records allocated, less those freed

end check_memory;


procedure set_type(rec,typ);		-- set the type of a record

	stg := dr_load(rec);		-- force load; then set the type byte
	stg(type_byte) := typ;
	dr_setrecbuf(rec,stg);
	dr_dirtify(rec);
	return rec;				-- return the record

end set_type;

procedure set_is_compound(rec,isc);		-- set the is_compound flag for this node
	tb := (stg := dr_load(rec))(type_byte);		-- force load; then set the first character of the record
	ic := abs(tb) >= 128;
	if isc = ic then return; end if;
	stg(type_byte) := if isc then char(abs(tb) + 128) else char(abs(tb) - 128) end if;
	dr_setrecbuf(rec,stg);
	dr_dirtify(rec);
	
end set_is_compound;

end disk_records_pak;

package body db_records;		-- this package provides basic operations for the six classes of record 'objects'
								-- used in the database application.

use byteutil,disk_records_pak,string_utility_pak,setldb;
	-- the layout of this record type is: 2 lead bytes string length; remaining bytes are characters
	-- total 64 bytes

procedure num_childr(rec);				-- number of children of node

	stg := dr_load(rec);					-- ensure that the record is loaded

	--print(" #REC ",#rec);
	--print(" REC ",int_of_4(rec));
	--db_debug(stg);
	return abs(stg(nch_byte)); 

end num_childr;

procedure set_num_childr(rec,n);				-- set number of children of node

	stg := dr_load(rec);					-- ensure that the record is loaded
	stg(nch_byte) := char(n);	-- set the number of children
	dr_setrecbuf(rec,stg);
	dr_dirtify(rec);

end set_num_childr;

		-- ******** special routines for string_record nodes ********

	procedure sr_length(rec);			-- length of active part of a record
		stg := dr_load(rec);		-- load this recno into RAM
		return int_from_bytes(stg(sr_ncr_1..sr_ncr_2));	-- convert and return the length field
	end;
	
	procedure sr_slice(rec,i,j);			-- get slice of a record
	
		stg := dr_load(rec);		-- load this recno into RAM
		rec_len := int_from_bytes(stg(sr_ncr_1..sr_ncr_2));
		return stg(sr_char_start + i - 1..(sr_char_start + j - 1) min rec_size);
		 	-- return string section, but truncate to available part
	
	end;
	
	procedure sr_set_slice(rw rec,i,j,stg);	-- set slice of record
		
		if i < 1 or j < i - 1 then 
			abort("***** ILLEGAL SLICE BOUNDARIES IN DISK_RECORDS STRING ASSIGNMENT ***** " + str(i) + " " + str(j)); 
		end if;

		stg_now := dr_load(rec);			-- load this record into RAM
										-- replace rec with a fresh copy if necessary 
		if refcount(int_of_4(rec)) > 1 then						-- must copy
			new_r := dr_new_rec(); dr_setrecbuf(new_r,stg_now);
			dr_dirtify(new_r);
			incref(rec,-1); rec := new_r;		-- substitute copy for original  -- [increfs(new_r,1); not applicable]
		end if;
	
		rec_len := int_from_bytes(stg_now(sr_ncr_1..sr_ncr_2));		-- present length
		erased := j - i + 1; inserted := #stg;
		
		rec_len +:= (inserted - erased);
		
		if rec_len + 2 > rec_size then 
			abort("***** RECORD BOUNDARY OVERSTEPPED IN DISK_RECORDS STRING ASSIGNMENT ***** "
							 + str(i) + " " + str(j) + " " + str(rec_len)); 
		end if;
	
		stg_now(sr_ncr_1..sr_ncr_2)  := bytes_from_int(rec_len); 
		stg_now(sr_char_start + i - 1..sr_char_start + j - 1) := stg;
		if #stg_now > rec_size then stg_now := stg_now(1..rec_size); end if;		--  cut to allowed length
		dr_setrecbuf(rec,stg_now);
		dr_dirtify(rec);		-- note that the record has been changed
		
	end;
	
		-- ******** special routines for wdoccs_string_record nodes ********

	procedure wo_length(rec);			-- length of active part of a record
		stg := dr_load(rec);		-- load this recno into RAM
		return int_from_byte(stg(wo_nr_1..wo_nr_2));	-- convert and return the length field
	end wo_length;

	procedure wo_slice(rec,i,j);			-- get slice of a wd occurence record

		stg := dr_load(rec);		-- load this recno into RAM
		rec_len := int_from_byte(stg(wo_nr_1..wo_nr_2));
		return stg(wo_occs_start + 4 * i - 4..(wo_occs_start + 4 * j - 1) min rec_size);
		 	-- return string section, but truncate to available part

	end wo_slice;

	procedure wo_set_slice(rw rec,i,j,stg);	-- set slice of wd occurence record
	
		if i < 1 or j < i - 1 then 
			abort("***** ILLEGAL SLICE BOUNDARIES IN DISK_RECORDS STRING ASSIGNMENT ***** " + str(i) + " " + str(j)); 
		end if;
		
		stg_now := dr_load(rec);		-- load this record into RAM
	
										-- replace rec with a fresh copy if necessary 
		if refcount(int_of_4(rec)) > 1 then						-- must copy
			new_r := dr_new_rec(); dr_setrecbuf(new_r,stg_now);
			dr_dirtify(new_r);
			incref(rec,-1); rec := new_r;		-- substitute copy for original  -- [increfs(new_r,1); not applicable]
		end if;
	
		rec_len := int_from_byte(stg_now(wo_nr_1..wo_nr_2));		-- present length
		erased := (j - i + 1); inserted := #stg/4;
		
		rec_len +:= (inserted - erased);
		
		if 4*rec_len + 2 > rec_size then 
			abort("***** RECORD BOUNDARY OVERSTEPPED IN DISK_RECORDS STRING ASSIGNMENT ***** "
							 + str(i) + " " + str(j) + " " + str(rec_len)); 
		end if;
	
		stg_now(wo_nr_1..wo_nr_2)  := byte_from_int(rec_len); 
		stg_now(wo_occs_start + 4 * i - 4..wo_occs_start + 4 * j - 1) := stg;

		if (nsn := #stg_now) > rec_size then 
			stg_now := stg_now(1..rec_size); 		--  cut to allowed length
		elseif nsn < rec_size then 			--  pad to required length
			stg_now := stg_now + (rec_size - nsn) * "\x00"; 
		end if;

		dr_setrecbuf(rec,stg_now);			-- copy back to the buffer
		dr_dirtify(rec);		-- note that the record has been changed

	end wo_set_slice;


		-- ******** special routines for bigstr_node_record nodes ********

		-- ******** special routines for wdoccs_str_node_record nodes ********

		-- ******** special routines for wd_index_node_record nodes ********

		-- ******** special routines for db_index_node_record nodes ********

end db_records;

program test;		-- test program for disk_records class

use db_records,byteutil,string_utility_pak,disk_records_pak,setldb;

recs := [make_string_record(): j in [1..10]];

print("records created");
dr_flush_all();		-- force flush of all the records
print("flush_all done");

for r = recs(j) loop 
	sr_set_slice(r,1,0,8 * ("Hello World" + str(j) + ". ")); recs(j) := r;
	print(sr_length(r)," ",sr_slice(r,1,sr_length(r)));
end loop;

the_tup := [ ]; r := recs(1);

for j in [1..10] loop
	incref(r,1);		-- increment the refcount of this object
	sr_set_slice(r,1,5,"");
	the_tup with:= r;
end loop;

for j in [1..10] loop
	incref(r,1);		-- increment the refcount of this object
	sr_set_slice(r,1,0,"XXXXX");
	the_tup with:= r;
end loop;

for j in [1..20] loop
	print("copy: ",j," ",sr_length(the_tup(j))," ",sr_slice(the_tup(j),1,sr_length(the_tup(j))));
end loop;

procedure make_string_record();		-- create an empty string record

	set_type(rec := dr_new_rec(),string_record); return rec;
	
end make_string_record;

end test;

« April 2024 »
Su Mo Tu We Th Fr Sa
1 2 3 4 5 6
7 8 9 10 11 12 13
14 15 16 17 18 19 20
21 22 23 24 25 26 27
28 29 30
 

Powered by Plone CMS, the Open Source Content Management System

This site conforms to the following standards: