1181624Skmacy/******************************************************************************
2181624Skmacy * blkif.h
3231743Sgibbs *
4181624Skmacy * Unified block-device I/O interface for Xen guest OSes.
5231743Sgibbs *
6181624Skmacy * Permission is hereby granted, free of charge, to any person obtaining a copy
7181624Skmacy * of this software and associated documentation files (the "Software"), to
8181624Skmacy * deal in the Software without restriction, including without limitation the
9181624Skmacy * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10181624Skmacy * sell copies of the Software, and to permit persons to whom the Software is
11181624Skmacy * furnished to do so, subject to the following conditions:
12181624Skmacy *
13181624Skmacy * The above copyright notice and this permission notice shall be included in
14181624Skmacy * all copies or substantial portions of the Software.
15181624Skmacy *
16181624Skmacy * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17181624Skmacy * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18181624Skmacy * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19181624Skmacy * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20181624Skmacy * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21181624Skmacy * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22181624Skmacy * DEALINGS IN THE SOFTWARE.
23181624Skmacy *
24181624Skmacy * Copyright (c) 2003-2004, Keir Fraser
25231743Sgibbs * Copyright (c) 2012, Spectra Logic Corporation
26181624Skmacy */
27181624Skmacy
28181624Skmacy#ifndef __XEN_PUBLIC_IO_BLKIF_H__
29181624Skmacy#define __XEN_PUBLIC_IO_BLKIF_H__
30181624Skmacy
31251767Sgibbs#include "ring.h"
32251767Sgibbs#include "../grant_table.h"
33181624Skmacy
34181624Skmacy/*
35181624Skmacy * Front->back notifications: When enqueuing a new request, sending a
36181624Skmacy * notification can be made conditional on req_event (i.e., the generic
37181624Skmacy * hold-off mechanism provided by the ring macros). Backends must set
38181624Skmacy * req_event appropriately (e.g., using RING_FINAL_CHECK_FOR_REQUESTS()).
39231743Sgibbs *
40181624Skmacy * Back->front notifications: When enqueuing a new response, sending a
41181624Skmacy * notification can be made conditional on rsp_event (i.e., the generic
42181624Skmacy * hold-off mechanism provided by the ring macros). Frontends must set
43181624Skmacy * rsp_event appropriately (e.g., using RING_FINAL_CHECK_FOR_RESPONSES()).
44181624Skmacy */
45181624Skmacy
46181624Skmacy#ifndef blkif_vdev_t
47181624Skmacy#define blkif_vdev_t   uint16_t
48181624Skmacy#endif
49181624Skmacy#define blkif_sector_t uint64_t
50181624Skmacy
51181624Skmacy/*
52231743Sgibbs * Feature and Parameter Negotiation
53231743Sgibbs * =================================
54231743Sgibbs * The two halves of a Xen block driver utilize nodes within the XenStore to
55231743Sgibbs * communicate capabilities and to negotiate operating parameters.  This
56231743Sgibbs * section enumerates these nodes which reside in the respective front and
57231743Sgibbs * backend portions of the XenStore, following the XenBus convention.
58231743Sgibbs *
59231743Sgibbs * All data in the XenStore is stored as strings.  Nodes specifying numeric
60231743Sgibbs * values are encoded in decimal.  Integer value ranges listed below are
61231743Sgibbs * expressed as fixed sized integer types capable of storing the conversion
62288917Sroyger * of a properly formated node string, without loss of information.
63231743Sgibbs *
64231743Sgibbs * Any specified default value is in effect if the corresponding XenBus node
65231743Sgibbs * is not present in the XenStore.
66231743Sgibbs *
67231743Sgibbs * XenStore nodes in sections marked "PRIVATE" are solely for use by the
68231743Sgibbs * driver side whose XenBus tree contains them.
69231743Sgibbs *
70232308Sgibbs * XenStore nodes marked "DEPRECATED" in their notes section should only be
71232308Sgibbs * used to provide interoperability with legacy implementations.
72232308Sgibbs *
73231743Sgibbs * See the XenBus state transition diagram below for details on when XenBus
74231743Sgibbs * nodes must be published and when they can be queried.
75231743Sgibbs *
76231743Sgibbs *****************************************************************************
77231743Sgibbs *                            Backend XenBus Nodes
78231743Sgibbs *****************************************************************************
79231743Sgibbs *
80231743Sgibbs *------------------ Backend Device Identification (PRIVATE) ------------------
81231743Sgibbs *
82231743Sgibbs * mode
83231743Sgibbs *      Values:         "r" (read only), "w" (writable)
84231743Sgibbs *
85231743Sgibbs *      The read or write access permissions to the backing store to be
86231743Sgibbs *      granted to the frontend.
87231743Sgibbs *
88231743Sgibbs * params
89231743Sgibbs *      Values:         string
90231743Sgibbs *
91288917Sroyger *      A free formatted string providing sufficient information for the
92288917Sroyger *      backend driver to open the backing device.  (e.g. the path to the
93288917Sroyger *      file or block device representing the backing store.)
94231743Sgibbs *
95288917Sroyger * physical-device
96288917Sroyger *      Values:         "MAJOR:MINOR"
97288917Sroyger *
98288917Sroyger *      MAJOR and MINOR are the major number and minor number of the
99288917Sroyger *      backing device respectively.
100288917Sroyger *
101231743Sgibbs * type
102231743Sgibbs *      Values:         "file", "phy", "tap"
103231743Sgibbs *
104231743Sgibbs *      The type of the backing device/object.
105231743Sgibbs *
106286062Scperciva *
107286062Scperciva * direct-io-safe
108286062Scperciva *      Values:         0/1 (boolean)
109286062Scperciva *      Default Value:  0
110286062Scperciva *
111286062Scperciva *      The underlying storage is not affected by the direct IO memory
112286062Scperciva *      lifetime bug.  See:
113286062Scperciva *        http://lists.xen.org/archives/html/xen-devel/2012-12/msg01154.html
114286062Scperciva *
115286062Scperciva *      Therefore this option gives the backend permission to use
116286062Scperciva *      O_DIRECT, notwithstanding that bug.
117286062Scperciva *
118286062Scperciva *      That is, if this option is enabled, use of O_DIRECT is safe,
119286062Scperciva *      in circumstances where we would normally have avoided it as a
120286062Scperciva *      workaround for that bug.  This option is not relevant for all
121286062Scperciva *      backends, and even not necessarily supported for those for
122286062Scperciva *      which it is relevant.  A backend which knows that it is not
123286062Scperciva *      affected by the bug can ignore this option.
124286062Scperciva *
125286062Scperciva *      This option doesn't require a backend to use O_DIRECT, so it
126286062Scperciva *      should not be used to try to control the caching behaviour.
127286062Scperciva *
128231743Sgibbs *--------------------------------- Features ---------------------------------
129231743Sgibbs *
130231743Sgibbs * feature-barrier
131231743Sgibbs *      Values:         0/1 (boolean)
132231743Sgibbs *      Default Value:  0
133231743Sgibbs *
134231743Sgibbs *      A value of "1" indicates that the backend can process requests
135231743Sgibbs *      containing the BLKIF_OP_WRITE_BARRIER request opcode.  Requests
136231743Sgibbs *      of this type may still be returned at any time with the
137231743Sgibbs *      BLKIF_RSP_EOPNOTSUPP result code.
138231743Sgibbs *
139231743Sgibbs * feature-flush-cache
140231743Sgibbs *      Values:         0/1 (boolean)
141231743Sgibbs *      Default Value:  0
142231743Sgibbs *
143231743Sgibbs *      A value of "1" indicates that the backend can process requests
144231743Sgibbs *      containing the BLKIF_OP_FLUSH_DISKCACHE request opcode.  Requests
145231743Sgibbs *      of this type may still be returned at any time with the
146231743Sgibbs *      BLKIF_RSP_EOPNOTSUPP result code.
147231743Sgibbs *
148231743Sgibbs * feature-discard
149231743Sgibbs *      Values:         0/1 (boolean)
150231743Sgibbs *      Default Value:  0
151231743Sgibbs *
152231743Sgibbs *      A value of "1" indicates that the backend can process requests
153231743Sgibbs *      containing the BLKIF_OP_DISCARD request opcode.  Requests
154231743Sgibbs *      of this type may still be returned at any time with the
155231743Sgibbs *      BLKIF_RSP_EOPNOTSUPP result code.
156231743Sgibbs *
157286062Scperciva * feature-persistent
158286062Scperciva *      Values:         0/1 (boolean)
159286062Scperciva *      Default Value:  0
160286062Scperciva *      Notes: 7
161286062Scperciva *
162286062Scperciva *      A value of "1" indicates that the backend can keep the grants used
163286062Scperciva *      by the frontend driver mapped, so the same set of grants should be
164286062Scperciva *      used in all transactions. The maximum number of grants the backend
165286062Scperciva *      can map persistently depends on the implementation, but ideally it
166286062Scperciva *      should be RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST. Using this
167286062Scperciva *      feature the backend doesn't need to unmap each grant, preventing
168286062Scperciva *      costly TLB flushes. The backend driver should only map grants
169286062Scperciva *      persistently if the frontend supports it. If a backend driver chooses
170286062Scperciva *      to use the persistent protocol when the frontend doesn't support it,
171286062Scperciva *      it will probably hit the maximum number of persistently mapped grants
172286062Scperciva *      (due to the fact that the frontend won't be reusing the same grants),
173286062Scperciva *      and fall back to non-persistent mode. Backend implementations may
174286062Scperciva *      shrink or expand the number of persistently mapped grants without
175286062Scperciva *      notifying the frontend depending on memory constraints (this might
176286062Scperciva *      cause a performance degradation).
177286062Scperciva *
178286062Scperciva *      If a backend driver wants to limit the maximum number of persistently
179286062Scperciva *      mapped grants to a value less than RING_SIZE *
180286062Scperciva *      BLKIF_MAX_SEGMENTS_PER_REQUEST a LRU strategy should be used to
181286062Scperciva *      discard the grants that are less commonly used. Using a LRU in the
182286062Scperciva *      backend driver paired with a LIFO queue in the frontend will
183286062Scperciva *      allow us to have better performance in this scenario.
184286062Scperciva *
185231743Sgibbs *----------------------- Request Transport Parameters ------------------------
186231743Sgibbs *
187231743Sgibbs * max-ring-page-order
188231743Sgibbs *      Values:         <uint32_t>
189231743Sgibbs *      Default Value:  0
190231743Sgibbs *      Notes:          1, 3
191231743Sgibbs *
192231743Sgibbs *      The maximum supported size of the request ring buffer in units of
193231743Sgibbs *      lb(machine pages). (e.g. 0 == 1 page,  1 = 2 pages, 2 == 4 pages,
194231743Sgibbs *      etc.).
195231743Sgibbs *
196231743Sgibbs * max-ring-pages
197231743Sgibbs *      Values:         <uint32_t>
198231743Sgibbs *      Default Value:  1
199232308Sgibbs *      Notes:          DEPRECATED, 2, 3
200231743Sgibbs *
201231743Sgibbs *      The maximum supported size of the request ring buffer in units of
202231743Sgibbs *      machine pages.  The value must be a power of 2.
203231743Sgibbs *
204231743Sgibbs *------------------------- Backend Device Properties -------------------------
205231743Sgibbs *
206286062Scperciva * discard-enable
207286062Scperciva *      Values:         0/1 (boolean)
208286062Scperciva *      Default Value:  1
209286062Scperciva *
210286062Scperciva *      This optional property, set by the toolstack, instructs the backend
211286062Scperciva *      to offer discard to the frontend. If the property is missing the
212286062Scperciva *      backend should offer discard if the backing storage actually supports
213286062Scperciva *      it. This optional property, set by the toolstack, requests that the
214286062Scperciva *      backend offer, or not offer, discard to the frontend.
215286062Scperciva *
216232308Sgibbs * discard-alignment
217231743Sgibbs *      Values:         <uint32_t>
218231743Sgibbs *      Default Value:  0
219231743Sgibbs *      Notes:          4, 5
220231743Sgibbs *
221231743Sgibbs *      The offset, in bytes from the beginning of the virtual block device,
222231743Sgibbs *      to the first, addressable, discard extent on the underlying device.
223231743Sgibbs *
224231743Sgibbs * discard-granularity
225231743Sgibbs *      Values:         <uint32_t>
226231743Sgibbs *      Default Value:  <"sector-size">
227231743Sgibbs *      Notes:          4
228231743Sgibbs *
229231743Sgibbs *      The size, in bytes, of the individually addressable discard extents
230231743Sgibbs *      of the underlying device.
231231743Sgibbs *
232231743Sgibbs * discard-secure
233231743Sgibbs *      Values:         0/1 (boolean)
234231743Sgibbs *      Default Value:  0
235286062Scperciva *      Notes:          10
236231743Sgibbs *
237231743Sgibbs *      A value of "1" indicates that the backend can process BLKIF_OP_DISCARD
238231743Sgibbs *      requests with the BLKIF_DISCARD_SECURE flag set.
239231743Sgibbs *
240231743Sgibbs * info
241231743Sgibbs *      Values:         <uint32_t> (bitmap)
242231743Sgibbs *
243231743Sgibbs *      A collection of bit flags describing attributes of the backing
244231743Sgibbs *      device.  The VDISK_* macros define the meaning of each bit
245231743Sgibbs *      location.
246231743Sgibbs *
247231743Sgibbs * sector-size
248231743Sgibbs *      Values:         <uint32_t>
249231743Sgibbs *
250286062Scperciva *      The logical sector size, in bytes, of the backend device.
251231743Sgibbs *
252286062Scperciva * physical-sector-size
253286062Scperciva *      Values:         <uint32_t>
254286062Scperciva *
255286062Scperciva *      The physical sector size, in bytes, of the backend device.
256286062Scperciva *
257231743Sgibbs * sectors
258231743Sgibbs *      Values:         <uint64_t>
259231743Sgibbs *
260286062Scperciva *      The size of the backend device, expressed in units of its logical
261231743Sgibbs *      sector size ("sector-size").
262231743Sgibbs *
263231743Sgibbs *****************************************************************************
264231743Sgibbs *                            Frontend XenBus Nodes
265231743Sgibbs *****************************************************************************
266231743Sgibbs *
267231743Sgibbs *----------------------- Request Transport Parameters -----------------------
268231743Sgibbs *
269231743Sgibbs * event-channel
270231743Sgibbs *      Values:         <uint32_t>
271231743Sgibbs *
272231743Sgibbs *      The identifier of the Xen event channel used to signal activity
273231743Sgibbs *      in the ring buffer.
274231743Sgibbs *
275231743Sgibbs * ring-ref
276231743Sgibbs *      Values:         <uint32_t>
277231743Sgibbs *      Notes:          6
278231743Sgibbs *
279231743Sgibbs *      The Xen grant reference granting permission for the backend to map
280231743Sgibbs *      the sole page in a single page sized ring buffer.
281231743Sgibbs *
282231743Sgibbs * ring-ref%u
283231743Sgibbs *      Values:         <uint32_t>
284231743Sgibbs *      Notes:          6
285231743Sgibbs *
286232308Sgibbs *      For a frontend providing a multi-page ring, a "number of ring pages"
287232308Sgibbs *      sized list of nodes, each containing a Xen grant reference granting
288231743Sgibbs *      permission for the backend to map the page of the ring located
289231743Sgibbs *      at page index "%u".  Page indexes are zero based.
290231743Sgibbs *
291231743Sgibbs * protocol
292231743Sgibbs *      Values:         string (XEN_IO_PROTO_ABI_*)
293231743Sgibbs *      Default Value:  XEN_IO_PROTO_ABI_NATIVE
294231743Sgibbs *
295231743Sgibbs *      The machine ABI rules governing the format of all ring request and
296231743Sgibbs *      response structures.
297231743Sgibbs *
298231743Sgibbs * ring-page-order
299231743Sgibbs *      Values:         <uint32_t>
300231743Sgibbs *      Default Value:  0
301231743Sgibbs *      Maximum Value:  MAX(ffs(max-ring-pages) - 1, max-ring-page-order)
302231743Sgibbs *      Notes:          1, 3
303231743Sgibbs *
304231743Sgibbs *      The size of the frontend allocated request ring buffer in units
305231743Sgibbs *      of lb(machine pages). (e.g. 0 == 1 page, 1 = 2 pages, 2 == 4 pages,
306231743Sgibbs *      etc.).
307231743Sgibbs *
308231743Sgibbs * num-ring-pages
309231743Sgibbs *      Values:         <uint32_t>
310231743Sgibbs *      Default Value:  1
311231743Sgibbs *      Maximum Value:  MAX(max-ring-pages,(0x1 << max-ring-page-order))
312232308Sgibbs *      Notes:          DEPRECATED, 2, 3
313231743Sgibbs *
314231743Sgibbs *      The size of the frontend allocated request ring buffer in units of
315231743Sgibbs *      machine pages.  The value must be a power of 2.
316231743Sgibbs *
317286062Scperciva * feature-persistent
318286062Scperciva *      Values:         0/1 (boolean)
319286062Scperciva *      Default Value:  0
320286062Scperciva *      Notes: 7, 8, 9
321286062Scperciva *
322286062Scperciva *      A value of "1" indicates that the frontend will reuse the same grants
323286062Scperciva *      for all transactions, allowing the backend to map them with write
324286062Scperciva *      access (even when it should be read-only). If the frontend hits the
325286062Scperciva *      maximum number of allowed persistently mapped grants, it can fallback
326286062Scperciva *      to non persistent mode. This will cause a performance degradation,
327300050Seadler *      since the backend driver will still try to map those grants
328286062Scperciva *      persistently. Since the persistent grants protocol is compatible with
329286062Scperciva *      the previous protocol, a frontend driver can choose to work in
330286062Scperciva *      persistent mode even when the backend doesn't support it.
331286062Scperciva *
332286062Scperciva *      It is recommended that the frontend driver stores the persistently
333286062Scperciva *      mapped grants in a LIFO queue, so a subset of all persistently mapped
334286062Scperciva *      grants gets used commonly. This is done in case the backend driver
335286062Scperciva *      decides to limit the maximum number of persistently mapped grants
336286062Scperciva *      to a value less than RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST.
337286062Scperciva *
338231743Sgibbs *------------------------- Virtual Device Properties -------------------------
339231743Sgibbs *
340231743Sgibbs * device-type
341231743Sgibbs *      Values:         "disk", "cdrom", "floppy", etc.
342231743Sgibbs *
343231743Sgibbs * virtual-device
344231743Sgibbs *      Values:         <uint32_t>
345231743Sgibbs *
346231743Sgibbs *      A value indicating the physical device to virtualize within the
347231743Sgibbs *      frontend's domain.  (e.g. "The first ATA disk", "The third SCSI
348231743Sgibbs *      disk", etc.)
349231743Sgibbs *
350231743Sgibbs *      See docs/misc/vbd-interface.txt for details on the format of this
351231743Sgibbs *      value.
352231743Sgibbs *
353231743Sgibbs * Notes
354231743Sgibbs * -----
355231743Sgibbs * (1) Multi-page ring buffer scheme first developed in the Citrix XenServer
356231743Sgibbs *     PV drivers.
357286062Scperciva * (2) Multi-page ring buffer scheme first used in some RedHat distributions
358231743Sgibbs *     including a distribution deployed on certain nodes of the Amazon
359231743Sgibbs *     EC2 cluster.
360231743Sgibbs * (3) Support for multi-page ring buffers was implemented independently,
361286062Scperciva *     in slightly different forms, by both Citrix and RedHat/Amazon.
362231743Sgibbs *     For full interoperability, block front and backends should publish
363231743Sgibbs *     identical ring parameters, adjusted for unit differences, to the
364231743Sgibbs *     XenStore nodes used in both schemes.
365286062Scperciva * (4) Devices that support discard functionality may internally allocate space
366286062Scperciva *     (discardable extents) in units that are larger than the exported logical
367286062Scperciva *     block size. If the backing device has such discardable extents the
368286062Scperciva *     backend should provide both discard-granularity and discard-alignment.
369286062Scperciva *     Providing just one of the two may be considered an error by the frontend.
370286062Scperciva *     Backends supporting discard should include discard-granularity and
371286062Scperciva *     discard-alignment even if it supports discarding individual sectors.
372286062Scperciva *     Frontends should assume discard-alignment == 0 and discard-granularity
373286062Scperciva *     == sector size if these keys are missing.
374231743Sgibbs * (5) The discard-alignment parameter allows a physical device to be
375231743Sgibbs *     partitioned into virtual devices that do not necessarily begin or
376231743Sgibbs *     end on a discardable extent boundary.
377231743Sgibbs * (6) When there is only a single page allocated to the request ring,
378231743Sgibbs *     'ring-ref' is used to communicate the grant reference for this
379231743Sgibbs *     page to the backend.  When using a multi-page ring, the 'ring-ref'
380231743Sgibbs *     node is not created.  Instead 'ring-ref0' - 'ring-refN' are used.
381286062Scperciva * (7) When using persistent grants data has to be copied from/to the page
382286062Scperciva *     where the grant is currently mapped. The overhead of doing this copy
383286062Scperciva *     however doesn't suppress the speed improvement of not having to unmap
384286062Scperciva *     the grants.
385286062Scperciva * (8) The frontend driver has to allow the backend driver to map all grants
386286062Scperciva *     with write access, even when they should be mapped read-only, since
387286062Scperciva *     further requests may reuse these grants and require write permissions.
388286062Scperciva * (9) Linux implementation doesn't have a limit on the maximum number of
389286062Scperciva *     grants that can be persistently mapped in the frontend driver, but
390286062Scperciva *     due to the frontent driver implementation it should never be bigger
391286062Scperciva *     than RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST.
392286062Scperciva *(10) The discard-secure property may be present and will be set to 1 if the
393286062Scperciva *     backing device supports secure discard.
394231743Sgibbs */
395231743Sgibbs
396231743Sgibbs/*
397231743Sgibbs * STATE DIAGRAMS
398231743Sgibbs *
399231743Sgibbs *****************************************************************************
400231743Sgibbs *                                   Startup                                 *
401231743Sgibbs *****************************************************************************
402231743Sgibbs *
403231743Sgibbs * Tool stack creates front and back nodes with state XenbusStateInitialising.
404231743Sgibbs *
405231743Sgibbs * Front                                Back
406231743Sgibbs * =================================    =====================================
407231743Sgibbs * XenbusStateInitialising              XenbusStateInitialising
408231743Sgibbs *  o Query virtual device               o Query backend device identification
409231743Sgibbs *    properties.                          data.
410231743Sgibbs *  o Setup OS device instance.          o Open and validate backend device.
411231743Sgibbs *                                       o Publish backend features and
412231743Sgibbs *                                         transport parameters.
413231743Sgibbs *                                                      |
414231743Sgibbs *                                                      |
415231743Sgibbs *                                                      V
416231743Sgibbs *                                      XenbusStateInitWait
417231743Sgibbs *
418231743Sgibbs * o Query backend features and
419231743Sgibbs *   transport parameters.
420231743Sgibbs * o Allocate and initialize the
421231743Sgibbs *   request ring.
422231743Sgibbs * o Publish transport parameters
423231743Sgibbs *   that will be in effect during
424231743Sgibbs *   this connection.
425231743Sgibbs *              |
426231743Sgibbs *              |
427231743Sgibbs *              V
428231743Sgibbs * XenbusStateInitialised
429231743Sgibbs *
430231743Sgibbs *                                       o Query frontend transport parameters.
431231743Sgibbs *                                       o Connect to the request ring and
432231743Sgibbs *                                         event channel.
433231743Sgibbs *                                       o Publish backend device properties.
434231743Sgibbs *                                                      |
435231743Sgibbs *                                                      |
436231743Sgibbs *                                                      V
437231743Sgibbs *                                      XenbusStateConnected
438231743Sgibbs *
439231743Sgibbs *  o Query backend device properties.
440231743Sgibbs *  o Finalize OS virtual device
441231743Sgibbs *    instance.
442231743Sgibbs *              |
443231743Sgibbs *              |
444231743Sgibbs *              V
445231743Sgibbs * XenbusStateConnected
446231743Sgibbs *
447231743Sgibbs * Note: Drivers that do not support any optional features, or the negotiation
448231743Sgibbs *       of transport parameters, can skip certain states in the state machine:
449231743Sgibbs *
450231743Sgibbs *       o A frontend may transition to XenbusStateInitialised without
451231743Sgibbs *         waiting for the backend to enter XenbusStateInitWait.  In this
452231743Sgibbs *         case, default transport parameters are in effect and any
453231743Sgibbs *         transport parameters published by the frontend must contain
454231743Sgibbs *         their default values.
455231743Sgibbs *
456231743Sgibbs *       o A backend may transition to XenbusStateInitialised, bypassing
457231743Sgibbs *         XenbusStateInitWait, without waiting for the frontend to first
458231743Sgibbs *         enter the XenbusStateInitialised state.  In this case, default
459231743Sgibbs *         transport parameters are in effect and any transport parameters
460231743Sgibbs *         published by the backend must contain their default values.
461231743Sgibbs *
462231743Sgibbs *       Drivers that support optional features and/or transport parameter
463231743Sgibbs *       negotiation must tolerate these additional state transition paths.
464231743Sgibbs *       In general this means performing the work of any skipped state
465231743Sgibbs *       transition, if it has not already been performed, in addition to the
466231743Sgibbs *       work associated with entry into the current state.
467231743Sgibbs */
468231743Sgibbs
469231743Sgibbs/*
470181624Skmacy * REQUEST CODES.
471181624Skmacy */
472181624Skmacy#define BLKIF_OP_READ              0
473181624Skmacy#define BLKIF_OP_WRITE             1
474181624Skmacy/*
475231743Sgibbs * All writes issued prior to a request with the BLKIF_OP_WRITE_BARRIER
476231743Sgibbs * operation code ("barrier request") must be completed prior to the
477231743Sgibbs * execution of the barrier request.  All writes issued after the barrier
478231743Sgibbs * request must not execute until after the completion of the barrier request.
479231743Sgibbs *
480231743Sgibbs * Optional.  See "feature-barrier" XenBus node documentation above.
481181624Skmacy */
482181624Skmacy#define BLKIF_OP_WRITE_BARRIER     2
483183375Skmacy/*
484231743Sgibbs * Commit any uncommitted contents of the backing device's volatile cache
485231743Sgibbs * to stable storage.
486231743Sgibbs *
487231743Sgibbs * Optional.  See "feature-flush-cache" XenBus node documentation above.
488183375Skmacy */
489183375Skmacy#define BLKIF_OP_FLUSH_DISKCACHE   3
490231743Sgibbs/*
491231743Sgibbs * Used in SLES sources for device specific command packet
492231743Sgibbs * contained within the request. Reserved for that purpose.
493231743Sgibbs */
494231743Sgibbs#define BLKIF_OP_RESERVED_1        4
495231743Sgibbs/*
496231743Sgibbs * Indicate to the backend device that a region of storage is no longer in
497231743Sgibbs * use, and may be discarded at any time without impact to the client.  If
498231743Sgibbs * the BLKIF_DISCARD_SECURE flag is set on the request, all copies of the
499231743Sgibbs * discarded region on the device must be rendered unrecoverable before the
500231743Sgibbs * command returns.
501231743Sgibbs *
502288917Sroyger * This operation is analogous to performing a trim (ATA) or unamp (SCSI),
503231743Sgibbs * command on a native device.
504231743Sgibbs *
505231743Sgibbs * More information about trim/unmap operations can be found at:
506231743Sgibbs * http://t13.org/Documents/UploadedDocuments/docs2008/
507231743Sgibbs *     e07154r6-Data_Set_Management_Proposal_for_ATA-ACS2.doc
508231743Sgibbs * http://www.seagate.com/staticfiles/support/disc/manuals/
509231743Sgibbs *     Interface%20manuals/100293068c.pdf
510231743Sgibbs *
511231743Sgibbs * Optional.  See "feature-discard", "discard-alignment",
512231743Sgibbs * "discard-granularity", and "discard-secure" in the XenBus node
513231743Sgibbs * documentation above.
514231743Sgibbs */
515231743Sgibbs#define BLKIF_OP_DISCARD           5
516181624Skmacy
517181624Skmacy/*
518286062Scperciva * Recognized if "feature-max-indirect-segments" in present in the backend
519286062Scperciva * xenbus info. The "feature-max-indirect-segments" node contains the maximum
520286062Scperciva * number of segments allowed by the backend per request. If the node is
521286062Scperciva * present, the frontend might use blkif_request_indirect structs in order to
522286062Scperciva * issue requests with more than BLKIF_MAX_SEGMENTS_PER_REQUEST (11). The
523286062Scperciva * maximum number of indirect segments is fixed by the backend, but the
524286062Scperciva * frontend can issue requests with any number of indirect segments as long as
525286062Scperciva * it's less than the number provided by the backend. The indirect_grefs field
526286062Scperciva * in blkif_request_indirect should be filled by the frontend with the
527286062Scperciva * grant references of the pages that are holding the indirect segments.
528286062Scperciva * These pages are filled with an array of blkif_request_segment that hold the
529286062Scperciva * information about the segments. The number of indirect pages to use is
530286062Scperciva * determined by the number of segments an indirect request contains. Every
531286062Scperciva * indirect page can contain a maximum of
532286062Scperciva * (PAGE_SIZE / sizeof(struct blkif_request_segment)) segments, so to
533286062Scperciva * calculate the number of indirect pages to use we have to do
534286062Scperciva * ceil(indirect_segments / (PAGE_SIZE / sizeof(struct blkif_request_segment))).
535286062Scperciva *
536286062Scperciva * If a backend does not recognize BLKIF_OP_INDIRECT, it should *not*
537286062Scperciva * create the "feature-max-indirect-segments" node!
538286062Scperciva */
539286062Scperciva#define BLKIF_OP_INDIRECT          6
540286062Scperciva
541286062Scperciva/*
542284664Scperciva * Maximum scatter/gather segments per request.
543284664Scperciva * This is carefully chosen so that sizeof(blkif_ring_t) <= PAGE_SIZE.
544284664Scperciva * NB. This could be 12 if the ring indexes weren't stored in the same page.
545214077Sgibbs */
546284296Sroyger#define BLKIF_MAX_SEGMENTS_PER_REQUEST 11
547214077Sgibbs
548231743Sgibbs/*
549286062Scperciva * Maximum number of indirect pages to use per request.
550286062Scperciva */
551286062Scperciva#define BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST 8
552286062Scperciva
553286062Scperciva/*
554231743Sgibbs * NB. first_sect and last_sect in blkif_request_segment, as well as
555231743Sgibbs * sector_number in blkif_request, are always expressed in 512-byte units.
556231743Sgibbs * However they must be properly aligned to the real sector size of the
557286062Scperciva * physical disk, which is reported in the "physical-sector-size" node in
558286062Scperciva * the backend xenbus info. Also the xenbus "sectors" node is expressed in
559286062Scperciva * 512-byte units.
560231743Sgibbs */
561181624Skmacystruct blkif_request_segment {
562181624Skmacy    grant_ref_t gref;        /* reference to I/O buffer frame        */
563181624Skmacy    /* @first_sect: first sector in frame to transfer (inclusive).   */
564181624Skmacy    /* @last_sect: last sector in frame to transfer (inclusive).     */
565181624Skmacy    uint8_t     first_sect, last_sect;
566181624Skmacy};
567181624Skmacy
568231743Sgibbs/*
569231743Sgibbs * Starting ring element for any I/O request.
570231743Sgibbs */
571181624Skmacystruct blkif_request {
572181624Skmacy    uint8_t        operation;    /* BLKIF_OP_???                         */
573181624Skmacy    uint8_t        nr_segments;  /* number of segments                   */
574181624Skmacy    blkif_vdev_t   handle;       /* only for read/write requests         */
575181624Skmacy    uint64_t       id;           /* private guest value, echoed in resp  */
576181624Skmacy    blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
577288917Sroyger    struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
578181624Skmacy};
579181624Skmacytypedef struct blkif_request blkif_request_t;
580181624Skmacy
581231743Sgibbs/*
582231743Sgibbs * Cast to this structure when blkif_request.operation == BLKIF_OP_DISCARD
583231743Sgibbs * sizeof(struct blkif_request_discard) <= sizeof(struct blkif_request)
584231743Sgibbs */
585231743Sgibbsstruct blkif_request_discard {
586231743Sgibbs    uint8_t        operation;    /* BLKIF_OP_DISCARD                     */
587231743Sgibbs    uint8_t        flag;         /* BLKIF_DISCARD_SECURE or zero         */
588231743Sgibbs#define BLKIF_DISCARD_SECURE (1<<0)  /* ignored if discard-secure=0      */
589231743Sgibbs    blkif_vdev_t   handle;       /* same as for read/write requests      */
590231743Sgibbs    uint64_t       id;           /* private guest value, echoed in resp  */
591231743Sgibbs    blkif_sector_t sector_number;/* start sector idx on disk             */
592231743Sgibbs    uint64_t       nr_sectors;   /* number of contiguous sectors to discard*/
593231743Sgibbs};
594231743Sgibbstypedef struct blkif_request_discard blkif_request_discard_t;
595231743Sgibbs
596286062Scpercivastruct blkif_request_indirect {
597286062Scperciva    uint8_t        operation;    /* BLKIF_OP_INDIRECT                    */
598286062Scperciva    uint8_t        indirect_op;  /* BLKIF_OP_{READ/WRITE}                */
599286062Scperciva    uint16_t       nr_segments;  /* number of segments                   */
600286062Scperciva    uint64_t       id;           /* private guest value, echoed in resp  */
601286062Scperciva    blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
602286062Scperciva    blkif_vdev_t   handle;       /* same as for read/write requests      */
603286062Scperciva    grant_ref_t    indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST];
604286062Scperciva#ifdef __i386__
605286062Scperciva    uint64_t       pad;          /* Make it 64 byte aligned on i386      */
606286062Scperciva#endif
607286062Scperciva};
608286062Scpercivatypedef struct blkif_request_indirect blkif_request_indirect_t;
609286062Scperciva
610181624Skmacystruct blkif_response {
611181624Skmacy    uint64_t        id;              /* copied from request */
612181624Skmacy    uint8_t         operation;       /* copied from request */
613181624Skmacy    int16_t         status;          /* BLKIF_RSP_???       */
614181624Skmacy};
615181624Skmacytypedef struct blkif_response blkif_response_t;
616181624Skmacy
617181624Skmacy/*
618181624Skmacy * STATUS RETURN CODES.
619181624Skmacy */
620181624Skmacy /* Operation not supported (only happens on barrier writes). */
621181624Skmacy#define BLKIF_RSP_EOPNOTSUPP  -2
622181624Skmacy /* Operation failed for some unspecified reason (-EIO). */
623181624Skmacy#define BLKIF_RSP_ERROR       -1
624181624Skmacy /* Operation completed successfully. */
625181624Skmacy#define BLKIF_RSP_OKAY         0
626181624Skmacy
627181624Skmacy/*
628181624Skmacy * Generate blkif ring structures and types.
629181624Skmacy */
630181624SkmacyDEFINE_RING_TYPES(blkif, struct blkif_request, struct blkif_response);
631181624Skmacy
632231743Sgibbs#define VDISK_CDROM        0x1
633231743Sgibbs#define VDISK_REMOVABLE    0x2
634231743Sgibbs#define VDISK_READONLY     0x4
635231743Sgibbs
636181624Skmacy#endif /* __XEN_PUBLIC_IO_BLKIF_H__ */
637181624Skmacy
638181624Skmacy/*
639181624Skmacy * Local variables:
640181624Skmacy * mode: C
641286062Scperciva * c-file-style: "BSD"
642181624Skmacy * c-basic-offset: 4
643181624Skmacy * tab-width: 4
644181624Skmacy * indent-tabs-mode: nil
645181624Skmacy * End:
646181624Skmacy */
647